diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index eeda759ff1..e718b32cb6 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -23,7 +23,7 @@ repos: - id: clang-format-with-version-check name: clang-format description: Format files with ClangFormat. - entry: bash ./.clang_format.hook -i + entry: bash ./tools/codestyle/clang_format.hook -i language: system files: \.(c|cc|cxx|cpp|cu|h|hpp|hxx|proto)$ - repo: local @@ -52,7 +52,7 @@ repos: hooks: - id: copyright_checker name: copyright_checker - entry: python ./.copyright.hook + entry: python ./tools/codestyle/copyright.hook language: system files: \.(c|cc|cxx|cpp|cu|h|hpp|hxx|proto|py)$ exclude: (?!.*third_party)^.*$ | (?!.*book)^.*$ diff --git a/.travis.yml b/.travis.yml index 8c77203092..a406841f6a 100644 --- a/.travis.yml +++ b/.travis.yml @@ -31,7 +31,7 @@ script: if [[ "$JOB" != "doc" ]]; then exit 0; fi; # For document only if [[ "$TRAVIS_PULL_REQUEST" != "false" ]]; then exit 0; fi; - if [[ "$TRAVIS_BRANCH" != "develop" && ! "$TRAVIS_BRANCH" =~ ^v[[:digit:]]+\.[[:digit:]]+(\.[[:digit:]]+)?(-\S*)?$ ]]; then exit 0; fi; + if [[ "$TRAVIS_BRANCH" != "develop" && ! "$TRAVIS_BRANCH" =~ ^v|release/[[:digit:]]+\.[[:digit:]]+(\.[[:digit:]]+)?(-\S*)?$ ]]; then exit 0; fi; export DEPLOY_DOCS_SH=https://raw.githubusercontent.com/PaddlePaddle/PaddlePaddle.org/master/scripts/deploy/deploy_docs.sh export DOCS_DIR=`pwd` cd .. diff --git a/CMakeLists.txt b/CMakeLists.txt index 4117f07721..23bb27e77b 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -61,8 +61,11 @@ option(EIGEN_USE_THREADS "Compile with multi-threaded Eigen" OFF) option(WITH_ARM_FP16 "Use half precision support on armv8.2-a cpu" OFF) option(WITH_FAST_BUNDLE_TEST "Bundle tests that can be run in a single process together to reduce launch overhead" OFF) option(WITH_CONTRIB "Compile the third-party contributation" OFF) +option(REPLACE_ENFORCE_GLOG "Replace PADDLE_ENFORCE with glog/CHECK for better debug." OFF) option(WITH_ANAKIN "Compile with Anakin library" OFF) option(WITH_GRPC "Use grpc as the default rpc framework" ${WITH_DISTRIBUTE}) +option(WITH_BRPC_RDMA "Use brpc rdma as the rpc protocal" OFF) +option(WITH_SYSTEM_BLAS "Use system blas library" OFF) # CMAKE_BUILD_TYPE if(NOT CMAKE_BUILD_TYPE) @@ -131,6 +134,10 @@ if (NOT DEFINED WITH_MKLDNN) set(WITH_MKLDNN OFF) endif() endif() + +if (REPLACE_ENFORCE_GLOG) + add_definitions("-DREPLACE_ENFORCE_GLOG") +endif() ######################################################################################## include(external/mklml) # download mklml package @@ -153,12 +160,24 @@ include(external/cares) if(WITH_DISTRIBUTE) if(WITH_GRPC) include(external/grpc) + message(STATUS "Use grpc framework.") else() + message(STATUS "Use brpc framework.") include(external/leveldb) include(external/brpc) endif() endif() +if(WITH_BRPC_RDMA) + message(STATUS "Use brpc with rdma.") + if(WITH_GRPC) + message(FATAL_ERROR "Can't use grpc with brpc rdma.") + endif() + if(NOT WITH_DISTRIBUTE) + message(FATAL_ERROR "Can't use brpc rdma in no distribute env.") + endif() +endif() + include(external/snappy) # download snappy include(external/snappystream) include(external/threadpool) @@ -178,7 +197,7 @@ include(inference_lib) # add paddle fluid inference libraries include_directories("${PADDLE_SOURCE_DIR}") -include_directories("${PADDLE_SOURCE_DIR}/paddle/cuda/include") +include_directories("${PADDLE_SOURCE_DIR}/paddle/legacy/cuda/include") include_directories("${CMAKE_CURRENT_BINARY_DIR}/proto") include_directories("${CMAKE_CURRENT_BINARY_DIR}/go/pserver/client/c") @@ -222,7 +241,7 @@ add_subdirectory(proto) if(NOT MOBILE_INFERENCE AND NOT WITH_FLUID_ONLY) # "add_subdirectory(go)" should be placed after the following loine, # because it depends on paddle/optimizer. - add_subdirectory(paddle/optimizer) + add_subdirectory(paddle/legacy/optimizer) endif() # "add_subdirectory(paddle)" and "add_subdirectory(python)" should be diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index b1b02bcc2f..b878f37a5b 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -159,4 +159,4 @@ This will enable VLOG messages generated by `buddy_allocator.{h,cc}` and in the - verbose level 1: [framework](https://github.com/PaddlePaddle/Paddle/tree/develop/paddle/framework) - verbose level 3: [operators](https://github.com/PaddlePaddle/Paddle/tree/develop/paddle/operators) - verbose level 5: [memory](https://github.com/PaddlePaddle/Paddle/tree/develop/paddle/memory), [platform](https://github.com/PaddlePaddle/Paddle/tree/develop/paddle/platform) -- verbose level 7: [math](https://github.com/PaddlePaddle/Paddle/tree/develop/paddle/math) +- verbose level 7: [math](https://github.com/PaddlePaddle/Paddle/tree/develop/paddle/legacy/math) diff --git a/Dockerfile b/Dockerfile index 752fea5951..48c750358c 100644 --- a/Dockerfile +++ b/Dockerfile @@ -23,7 +23,7 @@ ENV HOME /root COPY ./paddle/scripts/docker/root/ /root/ RUN apt-get update && \ - apt-get install -y --allow-downgrades \ + apt-get install -y --allow-downgrades patchelf \ git python-pip python-dev python-opencv openssh-server bison \ libnccl2=2.1.2-1+cuda8.0 libnccl-dev=2.1.2-1+cuda8.0 \ wget unzip unrar tar xz-utils bzip2 gzip coreutils ntp \ @@ -76,7 +76,8 @@ RUN easy_install -U pip && \ pip install sphinx-rtd-theme==0.1.9 recommonmark RUN pip install pre-commit 'ipython==5.3.0' && \ - pip install 'ipykernel==4.6.0' 'jupyter==1.0.0' + pip install 'ipykernel==4.6.0' 'jupyter==1.0.0' && \ + pip install opencv-python #For docstring checker RUN pip install pylint pytest astroid isort diff --git a/README.md b/README.md index 8d89c6b1ec..eb99ed21d0 100644 --- a/README.md +++ b/README.md @@ -4,7 +4,6 @@ [![Build Status](https://travis-ci.org/PaddlePaddle/Paddle.svg?branch=develop)](https://travis-ci.org/PaddlePaddle/Paddle) [![Documentation Status](https://img.shields.io/badge/docs-latest-brightgreen.svg?style=flat)](http://www.paddlepaddle.org/docs/develop/documentation/en/getstarted/index_en.html) [![Documentation Status](https://img.shields.io/badge/中文文档-最新-brightgreen.svg)](http://www.paddlepaddle.org/docs/develop/documentation/zh/getstarted/index_cn.html) -[![Coverage Status](https://coveralls.io/repos/github/PaddlePaddle/Paddle/badge.svg?branch=develop)](https://coveralls.io/github/PaddlePaddle/Paddle?branch=develop) [![Release](https://img.shields.io/github/release/PaddlePaddle/Paddle.svg)](https://github.com/PaddlePaddle/Paddle/releases) [![License](https://img.shields.io/badge/license-Apache%202-blue.svg)](LICENSE) @@ -19,6 +18,8 @@ learning to many products at Baidu. Our vision is to enable deep learning for everyone via PaddlePaddle. Please refer to our [release announcement](https://github.com/PaddlePaddle/Paddle/releases) to track the latest feature of PaddlePaddle. +### Lastest PaddlePaddle Version: [Fluid](https://github.com/PaddlePaddle/Paddle/tree/develop/paddle/fluid) + ## Features - **Flexibility** diff --git a/benchmark/fluid/Dockerfile b/benchmark/fluid/Dockerfile index b9eaca5ee6..707fadb1fa 100644 --- a/benchmark/fluid/Dockerfile +++ b/benchmark/fluid/Dockerfile @@ -1,11 +1,18 @@ FROM nvidia/cuda:9.0-cudnn7-devel-ubuntu16.04 + +# Use UBUNTU_MIRROR can speed up apt-get speed. +# ARG UBUNTU_MIRROR +# RUN /bin/bash -c 'if [[ -n ${UBUNTU_MIRROR} ]]; then sed -i 's#http://archive.ubuntu.com/ubuntu#${UBUNTU_MIRROR}#g' /etc/apt/sources.list; fi' + RUN apt-get update && apt-get install -y python python-pip iputils-ping libgtk2.0-dev wget vim net-tools iftop python-opencv RUN ln -s /usr/lib/x86_64-linux-gnu/libcudnn.so.7 /usr/lib/libcudnn.so && ln -s /usr/lib/x86_64-linux-gnu/libnccl.so.2 /usr/lib/libnccl.so -RUN pip install -U pip -RUN pip install -U kubernetes paddlepaddle # IMPORTANT: # Add "ENV http_proxy=http://ip:port" if your download is slow, and don't forget to unset it at runtime. +# exmaple: unset http_proxy && unset https_proxy && python fluid_benchmark.py ... + +RUN pip install -U pip +RUN pip install -U kubernetes paddlepaddle RUN sh -c 'echo "import paddle.v2 as paddle\npaddle.dataset.cifar.train10()\npaddle.dataset.flowers.fetch()" | python' RUN sh -c 'echo "import paddle.v2 as paddle\npaddle.dataset.mnist.train()\npaddle.dataset.mnist.test()\npaddle.dataset.imdb.fetch()" | python' @@ -14,9 +21,11 @@ RUN pip uninstall -y paddlepaddle && mkdir /workspace ADD https://raw.githubusercontent.com/PaddlePaddle/cloud/develop/docker/paddle_k8s /usr/bin ADD https://raw.githubusercontent.com/PaddlePaddle/cloud/develop/docker/k8s_tools.py /root +RUN chmod +x /usr/bin/paddle_k8s ADD *.whl / -RUN pip install /*.whl && rm -f /*.whl && chmod +x /usr/bin/paddle_k8s +RUN pip install /*.whl && rm -f /*.whl ENV LD_LIBRARY_PATH=/usr/local/lib -ADD fluid_benchmark.py recordio_converter.py models/ /workspace/ +ADD fluid_benchmark.py recordio_converter.py args.py recordio_converter.py run.sh run_fluid_benchmark.sh /workspace/ +ADD models/ /workspace/models/ diff --git a/benchmark/fluid/args.py b/benchmark/fluid/args.py index 68a3d42d7a..a79f25ccc6 100644 --- a/benchmark/fluid/args.py +++ b/benchmark/fluid/args.py @@ -122,5 +122,13 @@ def parse_args(): type=str, default="", help='Directory that contains all the training recordio files.') + parser.add_argument( + '--use_inference_transpiler', + action='store_true', + help='If set, use inference transpiler to optimize the program.') + parser.add_argument( + '--no_random', + action='store_true', + help='If set, keep the random seed and do not shuffle the data.') args = parser.parse_args() return args diff --git a/benchmark/fluid/fluid_benchmark.py b/benchmark/fluid/fluid_benchmark.py index aa70783ecd..94ea7bd6ac 100644 --- a/benchmark/fluid/fluid_benchmark.py +++ b/benchmark/fluid/fluid_benchmark.py @@ -97,7 +97,7 @@ def dist_transpile(trainer_id, args): return train_program, fluid.default_startup_program() else: raise ValueError( - 'TRAINING_ROLE environment variable must be either TRAINER or PSERVER' + 'PADDLE_TRAINING_ROLE environment variable must be either TRAINER or PSERVER' ) @@ -131,6 +131,7 @@ def train(avg_loss, infer_prog, optimizer, train_reader, test_reader, batch_acc, exe = fluid.Executor(place) exe.run(startup_prog) + # Use inference_transpiler to speedup if not args.use_reader_op: feed_var_list = [ var for var in train_prog.global_block().vars.itervalues() @@ -181,6 +182,10 @@ def train(avg_loss, infer_prog, optimizer, train_reader, test_reader, batch_acc, print("Pass: %d, Loss: %f" % (pass_id, np.mean(train_losses))), # evaluation if not args.no_test and batch_acc and not args.use_reader_op: + if args.use_inference_transpiler: + t = fluid.InferenceTranspiler() + t.transpile(infer_prog, place) + pass_test_acc = test(exe, infer_prog, test_reader, feeder, batch_acc) print(", Test Accuracy: %f" % pass_test_acc) @@ -264,8 +269,6 @@ def train_parallel(avg_loss, infer_prog, optimizer, train_reader, test_reader, break else: loss, = exe.run([avg_loss.name], feed=feeder.feed(data)) - if args.update_method == "pserver": - exe.bcast_params() if args.use_reader_op: num_samples += args.batch_size * args.gpus else: @@ -301,9 +304,20 @@ def print_train_time(start_time, end_time, num_samples): (num_samples, train_elapsed, examples_per_sec)) +def print_paddle_envs(): + print('----------- Configuration envs -----------') + for k in os.environ: + if "PADDLE_" in k: + print "ENV %s:%s" % (k, os.environ[k]) + print('------------------------------------------------') + + def main(): args = parse_args() print_arguments(args) + print_paddle_envs() + if args.no_random: + fluid.default_startup_program().random_seed = 1 # the unique trainer id, starting from 0, needed by trainer # only diff --git a/benchmark/fluid/kube_gen_job.py b/benchmark/fluid/kube_gen_job.py index 9da8a69af1..dfe8b5cdd5 100644 --- a/benchmark/fluid/kube_gen_job.py +++ b/benchmark/fluid/kube_gen_job.py @@ -17,6 +17,7 @@ import copy import argparse import random import os +import copy from kube_templates import pserver, trainer, envs @@ -108,10 +109,9 @@ def gen_job(): tn_container["ports"][0]["containerPort"] = spreadport envs.append({"name": "PADDLE_JOB_NAME", "value": args.jobname}) - envs.append({"name": "TRAINERS", "value": str(args.trainers)}) - envs.append({"name": "PSERVERS", "value": str(args.pservers)}) + envs.append({"name": "PADDLE_TRAINERS", "value": str(args.trainers)}) + envs.append({"name": "PADDLE_PSERVERS", "value": str(args.pservers)}) envs.append({"name": "ENTRY", "value": args.entry}) - envs.append({"name": "PADDLE_INIT_PORT", "value": str(args.port)}) envs.append({"name": "PADDLE_PSERVER_PORT", "value": str(args.port)}) # NOTE: these directories below are cluster specific, please modify # this settings before you run on your own cluster. @@ -166,17 +166,23 @@ def gen_job(): tn["spec"]["template"]["spec"]["volumes"] = volumes tn_container["volumeMounts"] = volumeMounts - ps_container["env"] = envs - ps_container["env"].append({"name": "TRAINING_ROLE", "value": "PSERVER"}) + ps_container["env"] = copy.deepcopy(envs) + ps_container["env"].append({ + "name": "PADDLE_TRAINING_ROLE", + "value": "PSERVER" + }) tn_container["env"] = envs if args.disttype == "pserver": tn_container["env"].append({ - "name": "TRAINING_ROLE", + "name": "PADDLE_TRAINING_ROLE", "value": "TRAINER" }) elif args.disttype == "nccl2" or args.disttype == "local": # NCCL2 have no training role, set to plain WORKER - tn_container["env"].append({"name": "TRAINING_ROLE", "value": "WORKER"}) + tn_container["env"].append({ + "name": "PADDLE_TRAINING_ROLE", + "value": "WORKER" + }) os.mkdir(args.jobname) if args.disttype == "pserver": diff --git a/benchmark/fluid/models/machine_translation.py b/benchmark/fluid/models/machine_translation.py index 69541adf6b..17f6b03826 100644 --- a/benchmark/fluid/models/machine_translation.py +++ b/benchmark/fluid/models/machine_translation.py @@ -173,21 +173,6 @@ def seq_to_seq_net(embedding_dim, encoder_size, decoder_size, source_dict_dim, return avg_cost, feeding_list -def to_lodtensor(data, place): - seq_lens = [len(seq) for seq in data] - cur_len = 0 - lod = [cur_len] - for l in seq_lens: - cur_len += l - lod.append(cur_len) - flattened_data = np.concatenate(data, axis=0).astype("int64") - flattened_data = flattened_data.reshape([len(flattened_data), 1]) - lod_t = core.LoDTensor() - lod_t.set(flattened_data, place) - lod_t.set_lod([lod]) - return lod_t, lod[-1] - - def lodtensor_to_ndarray(lod_tensor): dims = lod_tensor.get_dims() ndarray = np.zeros(shape=dims).astype('float32') diff --git a/benchmark/fluid/models/resnet.py b/benchmark/fluid/models/resnet.py index 9ed1093c54..d44a9c07d3 100644 --- a/benchmark/fluid/models/resnet.py +++ b/benchmark/fluid/models/resnet.py @@ -197,12 +197,12 @@ def get_model(args): optimizer = fluid.optimizer.Momentum(learning_rate=0.01, momentum=0.9) batched_train_reader = paddle.batch( - paddle.reader.shuffle( + train_reader if args.no_random else paddle.reader.shuffle( train_reader, buf_size=5120), batch_size=args.batch_size * args.gpus, drop_last=True) batched_test_reader = paddle.batch( - train_reader, batch_size=args.batch_size, drop_last=True) + test_reader, batch_size=args.batch_size, drop_last=True) return avg_cost, inference_program, optimizer, batched_train_reader,\ batched_test_reader, batch_acc diff --git a/benchmark/fluid/models/stacked_dynamic_lstm.py b/benchmark/fluid/models/stacked_dynamic_lstm.py index 211869af4e..3231542a17 100644 --- a/benchmark/fluid/models/stacked_dynamic_lstm.py +++ b/benchmark/fluid/models/stacked_dynamic_lstm.py @@ -125,18 +125,3 @@ def get_model(args): batch_size=args.batch_size) return loss, inference_program, adam, train_reader, test_reader, batch_acc - - -def to_lodtensor(data, place): - seq_lens = [len(seq) for seq in data] - cur_len = 0 - lod = [cur_len] - for l in seq_lens: - cur_len += l - lod.append(cur_len) - flattened_data = numpy.concatenate(data, axis=0).astype("int64") - flattened_data = flattened_data.reshape([len(flattened_data), 1]) - res = fluid.LoDTensor() - res.set(flattened_data, place) - res.set_lod([lod]) - return res diff --git a/cmake/cblas.cmake b/cmake/cblas.cmake index e3b9d94215..6ed51c6484 100644 --- a/cmake/cblas.cmake +++ b/cmake/cblas.cmake @@ -83,18 +83,20 @@ else() set(REFERENCE_CBLAS_LIB_SEARCH_PATHS ${REFERENCE_CBLAS_ROOT}/lib) endif() -find_path(REFERENCE_CBLAS_INCLUDE_DIR NAMES cblas.h PATHS +if(WITH_SYSTEM_BLAS) + find_path(REFERENCE_CBLAS_INCLUDE_DIR NAMES cblas.h PATHS ${REFERENCE_CBLAS_INCLUDE_SEARCH_PATHS}) -find_library(REFERENCE_CBLAS_LIBRARY NAMES cblas PATHS + find_library(REFERENCE_CBLAS_LIBRARY NAMES cblas PATHS ${REFERENCE_CBLAS_LIB_SEARCH_PATHS}) -if(REFERENCE_CBLAS_INCLUDE_DIR AND REFERENCE_CBLAS_LIBRARY) - set(CBLAS_FOUND ON) - set(CBLAS_PROVIDER REFERENCE) - set(CBLAS_INC_DIR ${REFERENCE_CBLAS_INCLUDE_DIR}) - set(CBLAS_LIBRARIES ${REFERENCE_CBLAS_LIBRARY}) - add_definitions(-DPADDLE_USE_REFERENCE_CBLAS) - message(STATUS "Found reference-cblas (include: ${CBLAS_INC_DIR}, library: ${CBLAS_LIBRARIES})") + if(REFERENCE_CBLAS_INCLUDE_DIR AND REFERENCE_CBLAS_LIBRARY) + set(CBLAS_FOUND ON) + set(CBLAS_PROVIDER REFERENCE) + set(CBLAS_INC_DIR ${REFERENCE_CBLAS_INCLUDE_DIR}) + set(CBLAS_LIBRARIES ${REFERENCE_CBLAS_LIBRARY}) + add_definitions(-DPADDLE_USE_REFERENCE_CBLAS) + message(STATUS "Found reference-cblas (include: ${CBLAS_INC_DIR}, library: ${CBLAS_LIBRARIES})") + endif() endif() if(IOS_USE_VECLIB_FOR_BLAS AND VECLIB_FOUND) diff --git a/cmake/configure.cmake b/cmake/configure.cmake index 6a8b15a6b6..e4af34d10e 100644 --- a/cmake/configure.cmake +++ b/cmake/configure.cmake @@ -174,3 +174,7 @@ endif(WITH_GOLANG) if(WITH_GRPC) add_definitions(-DPADDLE_WITH_GRPC) endif(WITH_GRPC) + +if(WITH_BRPC_RDMA) + add_definitions(-DPADDLE_WITH_BRPC_RDMA) +endif(WITH_BRPC_RDMA) diff --git a/cmake/external/anakin.cmake b/cmake/external/anakin.cmake index f1cd9c99eb..fb3d8ef8d5 100644 --- a/cmake/external/anakin.cmake +++ b/cmake/external/anakin.cmake @@ -7,7 +7,17 @@ set(ANAKIN_INSTALL_DIR "${THIRD_PARTY_PATH}/install/anakin" CACHE PATH set(ANAKIN_INCLUDE "${ANAKIN_INSTALL_DIR}" CACHE STRING "root of Anakin header files") set(ANAKIN_LIBRARY "${ANAKIN_INSTALL_DIR}" CACHE STRING "path of Anakin library") -set(ANAKIN_COMPILE_EXTRA_FLAGS -Wno-error=unused-variable -Wno-error=format-extra-args -Wno-error=comment -Wno-error=format -Wno-error=switch -Wno-error=return-type -Wno-error=non-virtual-dtor -Wno-reorder -Wno-error=cpp) +set(ANAKIN_COMPILE_EXTRA_FLAGS + -Wno-error=unused-variable -Wno-unused-variable + -Wno-error=format-extra-args -Wno-format-extra-args + -Wno-error=comment -Wno-comment + -Wno-error=format -Wno-format + -Wno-error=switch -Wno-switch + -Wno-error=return-type -Wno-return-type + -Wno-error=non-virtual-dtor -Wno-non-virtual-dtor + -Wno-sign-compare + -Wno-reorder + -Wno-error=cpp) set(ANAKIN_LIBRARY_URL "https://github.com/pangge/Anakin/releases/download/3.0/anakin_release_simple.tar.gz") @@ -26,13 +36,15 @@ function(fetch_include_recursively root_dir) endforeach() endfunction() -# download library -message(STATUS "Download Anakin library from ${ANAKIN_LIBRARY_URL}") -execute_process(COMMAND bash -c "mkdir -p ${ANAKIN_INSTALL_DIR}") -execute_process(COMMAND bash -c "rm -rf ${ANAKIN_INSTALL_DIR}/*") -execute_process(COMMAND bash -c "cd ${ANAKIN_INSTALL_DIR}; wget -q ${ANAKIN_LIBRARY_URL}") -execute_process(COMMAND bash -c "mkdir -p ${ANAKIN_INSTALL_DIR}") -execute_process(COMMAND bash -c "cd ${ANAKIN_INSTALL_DIR}; tar xzf anakin_release_simple.tar.gz") +if (NOT EXISTS "${ANAKIN_INSTALL_DIR}") + # download library + message(STATUS "Download Anakin library from ${ANAKIN_LIBRARY_URL}") + execute_process(COMMAND bash -c "mkdir -p ${ANAKIN_INSTALL_DIR}") + execute_process(COMMAND bash -c "rm -rf ${ANAKIN_INSTALL_DIR}/*") + execute_process(COMMAND bash -c "cd ${ANAKIN_INSTALL_DIR}; wget -q ${ANAKIN_LIBRARY_URL}") + execute_process(COMMAND bash -c "mkdir -p ${ANAKIN_INSTALL_DIR}") + execute_process(COMMAND bash -c "cd ${ANAKIN_INSTALL_DIR}; tar xzf anakin_release_simple.tar.gz") +endif() if (WITH_ANAKIN) message(STATUS "Anakin for inference is enabled") diff --git a/cmake/external/brpc.cmake b/cmake/external/brpc.cmake index 8e2c913b2c..30b227b645 100644 --- a/cmake/external/brpc.cmake +++ b/cmake/external/brpc.cmake @@ -14,6 +14,15 @@ INCLUDE(ExternalProject) +find_library(SSL_LIBRARY NAMES ssl) +ADD_LIBRARY(ssl SHARED IMPORTED GLOBAL) +SET_PROPERTY(TARGET ssl PROPERTY IMPORTED_LOCATION ${SSL_LIBRARY}) + +find_library(CRYPTO_LIBRARY NAMES crypto) +ADD_LIBRARY(crypto SHARED IMPORTED GLOBAL) +SET_PROPERTY(TARGET crypto PROPERTY IMPORTED_LOCATION ${CRYPTO_LIBRARY}) + + SET(BRPC_SOURCES_DIR ${THIRD_PARTY_PATH}/brpc) SET(BRPC_INSTALL_DIR ${THIRD_PARTY_PATH}/install/brpc) SET(BRPC_INCLUDE_DIR "${BRPC_INSTALL_DIR}/include" CACHE PATH "brpc include directory." FORCE) @@ -22,14 +31,14 @@ SET(BRPC_LIBRARIES "${BRPC_INSTALL_DIR}/lib/libbrpc.a" CACHE FILEPATH "brpc libr INCLUDE_DIRECTORIES(${BRPC_INCLUDE_DIR}) # Reference https://stackoverflow.com/questions/45414507/pass-a-list-of-prefix-paths-to-externalproject-add-in-cmake-args -set(prefix_path "${THIRD_PARTY_PATH}/install/gflags|${THIRD_PARTY_PATH}/install/leveldb|${THIRD_PARTY_PATH}/install/snappy|${THIRD_PARTY_PATH}/install/gtest|${THIRD_PARTY_PATH}/install/protobuf") +set(prefix_path "${THIRD_PARTY_PATH}/install/gflags|${THIRD_PARTY_PATH}/install/leveldb|${THIRD_PARTY_PATH}/install/snappy|${THIRD_PARTY_PATH}/install/gtest|${THIRD_PARTY_PATH}/install/protobuf|${THIRD_PARTY_PATH}/install/zlib") # If minimal .a is need, you can set WITH_DEBUG_SYMBOLS=OFF ExternalProject_Add( extern_brpc ${EXTERNAL_PROJECT_LOG_ARGS} - GIT_REPOSITORY "https://github.com/brpc/brpc" - GIT_TAG "6d153dd7ff00f960ae6895c9c5fff0ce9f07aff2" + GIT_REPOSITORY "https://github.com/gongweibao/brpc" + GIT_TAG "7dc04defad1fd4173aae170c3fcbde131b65155a" PREFIX ${BRPC_SOURCES_DIR} UPDATE_COMMAND "" CMAKE_ARGS -DCMAKE_CXX_COMPILER=${CMAKE_CXX_COMPILER} @@ -42,6 +51,8 @@ ExternalProject_Add( -DCMAKE_BUILD_TYPE=${THIRD_PARTY_BUILD_TYPE} -DCMAKE_PREFIX_PATH=${prefix_path} -DBRPC_WITH_GLOG=ON + -DIOBUF_WITH_HUGE_BLOCK=ON + -DBRPC_WITH_RDMA=${WITH_BRPC_RDMA} ${EXTERNAL_OPTIONAL_ARGS} LIST_SEPARATOR | CMAKE_CACHE_ARGS -DCMAKE_INSTALL_PREFIX:PATH=${BRPC_INSTALL_DIR} @@ -49,7 +60,7 @@ ExternalProject_Add( -DCMAKE_POSITION_INDEPENDENT_CODE:BOOL=ON -DCMAKE_BUILD_TYPE:STRING=${THIRD_PARTY_BUILD_TYPE} ) -ADD_DEPENDENCIES(extern_brpc protobuf leveldb gflags glog gtest snappy) +ADD_DEPENDENCIES(extern_brpc protobuf ssl crypto leveldb gflags glog gtest snappy) ADD_LIBRARY(brpc STATIC IMPORTED GLOBAL) SET_PROPERTY(TARGET brpc PROPERTY IMPORTED_LOCATION ${BRPC_LIBRARIES}) ADD_DEPENDENCIES(brpc extern_brpc) diff --git a/cmake/external/grpc.cmake b/cmake/external/grpc.cmake index ffdf91a354..85f40585da 100644 --- a/cmake/external/grpc.cmake +++ b/cmake/external/grpc.cmake @@ -40,12 +40,12 @@ ExternalProject_Add( # NOTE(wuyi): # this package is generated by following steps: # 1. git clone -b v1.8.x https://github.com/grpc/grpc.git - # 2. submodule update --init + # 2. git submodule update --init # 3. keep only zlib, cares, protobuf, boringssl under "third_party", # checkout and clean other dirs under third_party # 4. remove .git, and package the directory. - URL "http://paddlepaddledeps.bj.bcebos.com/grpc-v1.8.x.tar.gz" - URL_MD5 "c9c58ee7d0e8929a63155af6a2ecdbd0" + URL "http://paddlepaddledeps.bj.bcebos.com/grpc-v1.10.x.tar.gz" + URL_MD5 "1f268a2aff6759839dccd256adcc91cf" PREFIX ${GRPC_SOURCES_DIR} UPDATE_COMMAND "" CONFIGURE_COMMAND "" diff --git a/cmake/external/mkldnn.cmake b/cmake/external/mkldnn.cmake index 25c07850dd..20dda35c5c 100644 --- a/cmake/external/mkldnn.cmake +++ b/cmake/external/mkldnn.cmake @@ -45,7 +45,8 @@ IF(${CBLAS_PROVIDER} STREQUAL "MKLML") ELSE() MESSAGE(FATAL_ERROR "Should enable MKLML when build MKLDNN") ENDIF() -SET(MKLDNN_FLAG "-Wno-error=strict-overflow -Wno-error=unused-result -Wno-unused-result") +SET(MKLDNN_FLAG "-Wno-error=strict-overflow -Wno-error=unused-result") +SET(MKLDNN_FLAG "${MKLDNN_FLAG} -Wno-unused-result -Wno-unused-value") SET(MKLDNN_CFLAG "${CMAKE_C_FLAGS} ${MKLDNN_FLAG}") SET(MKLDNN_CXXFLAG "${CMAKE_CXX_FLAGS} ${MKLDNN_FLAG}") ExternalProject_Add( @@ -53,7 +54,7 @@ ExternalProject_Add( ${EXTERNAL_PROJECT_LOG_ARGS} DEPENDS ${MKLDNN_DEPENDS} GIT_REPOSITORY "https://github.com/01org/mkl-dnn.git" - GIT_TAG "db3424ad44901513c03a1ea31ccaacdf633fbe9f" + GIT_TAG "a29d8487a63afca3d5b8c5bbdbb473cf8ccc6e51" PREFIX ${MKLDNN_SOURCES_DIR} UPDATE_COMMAND "" CMAKE_ARGS -DCMAKE_INSTALL_PREFIX=${MKLDNN_INSTALL_DIR} diff --git a/cmake/external/openblas.cmake b/cmake/external/openblas.cmake index 4a49a92f2b..ce6a88b51d 100644 --- a/cmake/external/openblas.cmake +++ b/cmake/external/openblas.cmake @@ -114,7 +114,12 @@ INCLUDE_DIRECTORIES(${CBLAS_INC_DIR}) SET(dummyfile ${CMAKE_CURRENT_BINARY_DIR}/cblas_dummy.c) FILE(WRITE ${dummyfile} "const char *dummy_cblas = \"${dummyfile}\";") ADD_LIBRARY(cblas STATIC ${dummyfile}) -TARGET_LINK_LIBRARIES(cblas ${CBLAS_LIBRARIES}) + +IF("${CBLAS_PROVIDER}" STREQUAL "MKLML") + TARGET_LINK_LIBRARIES(cblas dynload_mklml) +ELSE() + TARGET_LINK_LIBRARIES(cblas ${CBLAS_LIBRARIES}) +ENDIF("${CBLAS_PROVIDER}" STREQUAL "MKLML") IF(NOT ${CBLAS_FOUND}) ADD_DEPENDENCIES(cblas extern_openblas) diff --git a/cmake/generic.cmake b/cmake/generic.cmake index 0e2df86c19..eafb11b6f2 100644 --- a/cmake/generic.cmake +++ b/cmake/generic.cmake @@ -96,6 +96,20 @@ if(NOT APPLE AND NOT ANDROID) set(CMAKE_CXX_LINK_EXECUTABLE "${CMAKE_CXX_LINK_EXECUTABLE} -pthread -ldl -lrt") endif(NOT APPLE AND NOT ANDROID) +set_property(GLOBAL PROPERTY FLUID_MODULES "") +# find all fluid modules is used for paddle fluid static library +# for building inference libs +function(find_fluid_modules TARGET_NAME) + get_filename_component(__target_path ${TARGET_NAME} ABSOLUTE) + string(REGEX REPLACE "^${PADDLE_SOURCE_DIR}/" "" __target_path ${__target_path}) + string(FIND "${__target_path}" "fluid" pos) + if(pos GREATER 1) + get_property(fluid_modules GLOBAL PROPERTY FLUID_MODULES) + set(fluid_modules ${fluid_modules} ${TARGET_NAME}) + set_property(GLOBAL PROPERTY FLUID_MODULES "${fluid_modules}") + endif() +endfunction(find_fluid_modules) + function(merge_static_libs TARGET_NAME) set(libs ${ARGN}) list(REMOVE_DUPLICATES libs) @@ -195,6 +209,15 @@ function(cc_library TARGET_NAME) list(REMOVE_ITEM cc_library_DEPS warpctc) add_dependencies(${TARGET_NAME} warpctc) endif() + # Only deps libmklml.so, not link + if("${cc_library_DEPS};" MATCHES "mklml;") + list(REMOVE_ITEM cc_library_DEPS mklml) + if(NOT "${TARGET_NAME}" MATCHES "dynload_mklml") + list(APPEND cc_library_DEPS dynload_mklml) + endif() + add_dependencies(${TARGET_NAME} mklml) + target_link_libraries(${TARGET_NAME} "-L${MKLML_LIB_DIR} -liomp5 -Wl,--as-needed") + endif() target_link_libraries(${TARGET_NAME} ${cc_library_DEPS}) add_dependencies(${TARGET_NAME} ${cc_library_DEPS}) endif() @@ -234,13 +257,14 @@ function(cc_test TARGET_NAME) set(multiValueArgs SRCS DEPS ARGS) cmake_parse_arguments(cc_test "${options}" "${oneValueArgs}" "${multiValueArgs}" ${ARGN}) add_executable(${TARGET_NAME} ${cc_test_SRCS}) - target_link_libraries(${TARGET_NAME} ${cc_test_DEPS} paddle_gtest_main memory gtest gflags glog) - add_dependencies(${TARGET_NAME} ${cc_test_DEPS} paddle_gtest_main memory gtest gflags glog) + target_link_libraries(${TARGET_NAME} ${cc_test_DEPS} paddle_gtest_main lod_tensor memory gtest gflags glog) + add_dependencies(${TARGET_NAME} ${cc_test_DEPS} paddle_gtest_main lod_tensor memory gtest gflags glog) add_test(NAME ${TARGET_NAME} COMMAND ${TARGET_NAME} ${cc_test_ARGS} WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}) if (${cc_test_SERIAL}) set_property(TEST ${TARGET_NAME} PROPERTY SERIAL 1) + set_property(TEST ${TARGET_NAME} PROPERTY ENVIRONMENT FLAGS_init_allocated_mem=true) endif() endif() endfunction(cc_test) @@ -300,11 +324,12 @@ function(nv_test TARGET_NAME) set(multiValueArgs SRCS DEPS) cmake_parse_arguments(nv_test "${options}" "${oneValueArgs}" "${multiValueArgs}" ${ARGN}) cuda_add_executable(${TARGET_NAME} ${nv_test_SRCS}) - target_link_libraries(${TARGET_NAME} ${nv_test_DEPS} paddle_gtest_main memory gtest gflags glog) - add_dependencies(${TARGET_NAME} ${nv_test_DEPS} paddle_gtest_main memory gtest gflags glog) + target_link_libraries(${TARGET_NAME} ${nv_test_DEPS} paddle_gtest_main lod_tensor memory gtest gflags glog) + add_dependencies(${TARGET_NAME} ${nv_test_DEPS} paddle_gtest_main lod_tensor memory gtest gflags glog) add_test(${TARGET_NAME} ${TARGET_NAME}) if (nv_test_SERIAL) set_property(TEST ${TARGET_NAME} PROPERTY SERIAL 1) + set_property(TEST ${TARGET_NAME} PROPERTY ENVIRONMENT FLAGS_init_allocated_mem=true) endif() endif() endfunction(nv_test) @@ -552,7 +577,7 @@ function(py_test TARGET_NAME) set(multiValueArgs SRCS DEPS ARGS ENVS) cmake_parse_arguments(py_test "${options}" "${oneValueArgs}" "${multiValueArgs}" ${ARGN}) add_test(NAME ${TARGET_NAME} - COMMAND env PYTHONPATH=${PADDLE_BINARY_DIR}/python ${py_test_ENVS} + COMMAND env FLAGS_init_allocated_mem=true PYTHONPATH=${PADDLE_BINARY_DIR}/python ${py_test_ENVS} ${PYTHON_EXECUTABLE} -u ${py_test_SRCS} ${py_test_ARGS} WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}) endif() diff --git a/cmake/inference_lib.cmake b/cmake/inference_lib.cmake index cd44fe2542..c697971323 100644 --- a/cmake/inference_lib.cmake +++ b/cmake/inference_lib.cmake @@ -12,19 +12,6 @@ # See the License for the specific language governing permissions and # limitations under the License. -set_property(GLOBAL PROPERTY FLUID_MODULES "") -# find all fluid modules is used for paddle fluid static library -function(find_fluid_modules TARGET_NAME) - get_filename_component(__target_path ${TARGET_NAME} ABSOLUTE) - string(REGEX REPLACE "^${PADDLE_SOURCE_DIR}/" "" __target_path ${__target_path}) - string(FIND "${__target_path}" "fluid" pos) - if(pos GREATER 1) - get_property(fluid_modules GLOBAL PROPERTY FLUID_MODULES) - set(fluid_modules ${fluid_modules} ${TARGET_NAME}) - set_property(GLOBAL PROPERTY FLUID_MODULES "${fluid_modules}") - endif() -endfunction(find_fluid_modules) - # make package for paddle fluid shared and static library function(copy TARGET) set(options "") @@ -149,21 +136,33 @@ copy(memory_lib DSTS ${dst_dir}/${module} ${dst_dir}/${module}/detail ) -set(module "inference") -copy(inference_lib DEPS paddle_fluid_shared paddle_fluid - SRCS ${src_dir}/${module}/*.h ${PADDLE_BINARY_DIR}/paddle/fluid/inference/libpaddle_fluid.* - DSTS ${dst_dir}/${module} ${dst_dir}/${module} -) +set(inference_deps paddle_fluid_shared paddle_fluid) if(WITH_CONTRIB) - set(contrib_dst_dir "${FLUID_INSTALL_DIR}/contrib/inference") - copy(contrib_inference_lib DEPS paddle_inference_api + message(STATUS "installing contrib") + set(contrib_dst_dir "${FLUID_INSTALL_DIR}/contrib/inference") + if (WITH_ANAKIN AND WITH_GPU) + copy(contrib_anakin_inference_lib DEPS paddle_inference_api inference_anakin_api + SRCS + ${PADDLE_BINARY_DIR}/paddle/contrib/inference/libinference_anakin_api* # compiled anakin api + ${PADDLE_BINARY_DIR}/third_party/install/anakin/*.tar.gz # anakin release + DSTS ${contrib_dst_dir}/anakin ${contrib_dst_dir}/anakin) + list(APPEND inference_deps contrib_anakin_inference_lib) + endif() + + copy(contrib_inference_lib DEPS paddle_inference_api paddle_inference_api_shared SRCS ${PADDLE_SOURCE_DIR}/paddle/contrib/inference/paddle_inference_api.h - ${PADDLE_BINARY_DIR}/paddle/contrib/inference/libpaddle_inference_api.* - DSTS ${contrib_dst_dir} ${contrib_dst_dir} - ) + ${PADDLE_BINARY_DIR}/paddle/contrib/inference/libpaddle_inference_api* + DSTS ${contrib_dst_dir} ${contrib_dst_dir}) + list(APPEND inference_deps contrib_inference_lib) endif() +set(module "inference") +copy(inference_lib DEPS ${inference_deps} + SRCS ${src_dir}/${module}/*.h ${PADDLE_BINARY_DIR}/paddle/fluid/inference/libpaddle_fluid.* + DSTS ${dst_dir}/${module} ${dst_dir}/${module} +) + set(module "platform") copy(platform_lib DEPS profiler_py_proto SRCS ${src_dir}/${module}/*.h ${src_dir}/${module}/dynload/*.h ${src_dir}/${module}/details/*.h diff --git a/cmake/version.cmake b/cmake/version.cmake index cde650128a..79b8e8ac49 100644 --- a/cmake/version.cmake +++ b/cmake/version.cmake @@ -1,16 +1,21 @@ # Get the latest git tag. set(PADDLE_VERSION $ENV{PADDLE_VERSION}) set(tmp_version "HEAD") +set(TAG_VERSION_REGEX "[0-9]+\\.[0-9]+\\.[0-9]+(\\.(a|b|rc)\\.[0-9]+)?") +set(COMMIT_VERSION_REGEX "[0-9a-f]+[0-9a-f]+[0-9a-f]+[0-9a-f]+[0-9a-f]+") while ("${PADDLE_VERSION}" STREQUAL "") execute_process( - COMMAND ${GIT_EXECUTABLE} describe --tags --abbrev=0 ${tmp_version} + COMMAND ${GIT_EXECUTABLE} describe --tags --abbrev=0 --always ${tmp_version} WORKING_DIRECTORY ${PADDLE_SOURCE_DIR} OUTPUT_VARIABLE GIT_TAG_NAME RESULT_VARIABLE GIT_RESULT ERROR_QUIET OUTPUT_STRIP_TRAILING_WHITESPACE) if (NOT ${GIT_RESULT}) # Check the tag is a correct version - if (${GIT_TAG_NAME} MATCHES "v[0-9]+\\.[0-9]+\\.[0-9]+(\\.(a|b|rc)\\.[0-9]+)?") + if (${GIT_TAG_NAME} MATCHES "${COMMIT_VERSION_REGEX}") + # if no tag was found, set PADDLE_VERSION to latest + set(PADDLE_VERSION "latest") + elseif (${GIT_TAG_NAME} MATCHES "v${TAG_VERSION_REGEX}") string(REPLACE "v" "" PADDLE_VERSION ${GIT_TAG_NAME}) else() # otherwise, get the previous git tag name. set(tmp_version "${GIT_TAG_NAME}~1") diff --git a/doc/about/about_us.rst b/doc/about/about_us.rst new file mode 100644 index 0000000000..f67d8b8130 --- /dev/null +++ b/doc/about/about_us.rst @@ -0,0 +1,53 @@ +========= +关于我们 +========= + +什么是PaddlePaddle +-------------------- + +- PaddlePaddle是百度自主研发并开源的深度学习框架,它能够让开发者和企业安全、快速地实现自己的AI想法 + +- 项目团队汇聚了全球顶级的深度学习科学家,致力于为开发者和企业提供最好的深度学习研发体验 + +- 框架具有易学、易用、安全、高效四大特性,是最适合中国开发者和企业的深度学习工具 + +PaddlePaddle的技术特色 +------------------------- + +- 新一代深度学习框架: PaddlePaddle是基于“深度学习编程语言”的新一代深度学习框架,在保证性能的同时,极大的提升了框架对模型的表达能力,能够描述任意潜在可能出现的模型 + +- 对大规模计算更加友好:经过百度内多种大规模计算业务的打磨,PaddlePaddle在分布式计算上表现优异,基于EDL技术能够节约大量计算资源,同时也能支持大规模稀疏模型的训练 + +- 提供可视化的深度学习:通过Visual DL可以帮助开发者方便的观测训练整体趋势、数据样本质量和中间结果、参数分布和变化趋势、以及模型的结构,帮助开发者更便捷的完成编程过程 + +提供基于PaddlePaddle的教育体系 +-------------------------------- + +- 深度学习课程:百度与中国市场顶级的教育、培训机构共同开发了深度学习精品课程以及学习教材,帮助开发者从零掌握深度学习 + +- 深度学习实训:对于目的是科研和学习的用户,PaddlePaddle提供了无需安装、线上运行的开发环境,并提供算法、算力、数据支持 + +- 线下培训:提供丰富、高质量的线下教育活动,如青年教师培训、线下实战营、沙龙等多种形式的培训和交流 + + +提供基于PaddlePaddle的AI服务 +------------------------------ + +- EadyDL:可以帮助零算法基础的企业快速完成一个深度学习任务,只需少量的数据即可得到优质的模型 + +- AI市场:提供标准化的AI 能力、产品的交易机制,帮助企业快速找到所需,有效开展AI业务 + +- 深度学习竞赛: PaddlePaddle汇聚顶尖深度学习开发者,企业可以发布自己的商业问题,通过竞赛方式快速找到最优的解决方案 + +你对PaddlePaddle有任何的问题都可以通过以下方式联系到我们 +----------------------------------------------------------- + +- 学习/使用问题:可以在 `PaddlePaddle开源社区 `_,以及 `PaddlePaddle中文社区 `_ 向我们反馈 + +- 对PaddlePaddle框架发展的建议:可发送邮件至Paddle-better@baidu.com + +我们期待与你一起打造世界顶级深度学习框架,共同推动AI技术的进步 + + + +PaddlePaddle团队 diff --git a/doc/fluid/api/average.rst b/doc/fluid/api/average.rst new file mode 100644 index 0000000000..496f5b2987 --- /dev/null +++ b/doc/fluid/api/average.rst @@ -0,0 +1,16 @@ +.. THIS FILE IS GENERATED BY `gen_doc.{py|sh}` + !DO NOT EDIT THIS FILE MANUALLY! + +============= +fluid.average +============= + +.. _api_fluid_average_WeightedAverage: + +WeightedAverage +--------------- + +.. autoclass:: paddle.fluid.average.WeightedAverage + :members: + :noindex: + diff --git a/doc/fluid/api/backward.rst b/doc/fluid/api/backward.rst new file mode 100644 index 0000000000..115e0d24b3 --- /dev/null +++ b/doc/fluid/api/backward.rst @@ -0,0 +1,23 @@ +.. THIS FILE IS GENERATED BY `gen_doc.{py|sh}` + !DO NOT EDIT THIS FILE MANUALLY! + +============== +fluid.backward +============== + +.. _api_fluid_backward_append_backward: + +append_backward +--------------- + +.. autofunction:: paddle.fluid.backward.append_backward + :noindex: + +.. _api_fluid_backward_calc_gradient: + +calc_gradient +------------- + +.. autofunction:: paddle.fluid.backward.calc_gradient + :noindex: + diff --git a/doc/fluid/api/clip.rst b/doc/fluid/api/clip.rst index 3ba096388f..aeefbb95a4 100644 --- a/doc/fluid/api/clip.rst +++ b/doc/fluid/api/clip.rst @@ -1,9 +1,11 @@ .. THIS FILE IS GENERATED BY `gen_doc.{py|sh}` !DO NOT EDIT THIS FILE MANUALLY! -==== -clip -==== +========== +fluid.clip +========== + +.. _api_fluid_clip_ErrorClipByValue: ErrorClipByValue ---------------- @@ -12,6 +14,8 @@ ErrorClipByValue :members: :noindex: +.. _api_fluid_clip_GradientClipByValue: + GradientClipByValue ------------------- @@ -19,6 +23,8 @@ GradientClipByValue :members: :noindex: +.. _api_fluid_clip_GradientClipByNorm: + GradientClipByNorm ------------------ @@ -26,6 +32,8 @@ GradientClipByNorm :members: :noindex: +.. _api_fluid_clip_GradientClipByGlobalNorm: + GradientClipByGlobalNorm ------------------------ @@ -33,15 +41,3 @@ GradientClipByGlobalNorm :members: :noindex: -append_gradient_clip_ops ------------------------- - -.. autofunction:: paddle.fluid.clip.append_gradient_clip_ops - :noindex: - -error_clip_callback -------------------- - -.. autofunction:: paddle.fluid.clip.error_clip_callback - :noindex: - diff --git a/doc/fluid/api/data.rst b/doc/fluid/api/data.rst deleted file mode 100644 index b56c7332cc..0000000000 --- a/doc/fluid/api/data.rst +++ /dev/null @@ -1,10 +0,0 @@ -================================== -Data Reader Interface and DataSets -================================== - -.. toctree:: - :maxdepth: 1 - - data/data_reader.rst - data/image.rst - data/dataset.rst diff --git a/doc/fluid/api/data_feeder.rst b/doc/fluid/api/data_feeder.rst index 3df5c0307f..11d2890f5b 100644 --- a/doc/fluid/api/data_feeder.rst +++ b/doc/fluid/api/data_feeder.rst @@ -1,9 +1,11 @@ .. THIS FILE IS GENERATED BY `gen_doc.{py|sh}` !DO NOT EDIT THIS FILE MANUALLY! -=========== -data_feeder -=========== +================= +fluid.data_feeder +================= + +.. _api_fluid_data_feeder_DataFeeder: DataFeeder ---------- diff --git a/doc/fluid/api/detection.rst b/doc/fluid/api/detection.rst deleted file mode 100644 index e69de29bb2..0000000000 diff --git a/doc/fluid/api/evaluator.rst b/doc/fluid/api/evaluator.rst deleted file mode 100644 index c0dc9a0d1d..0000000000 --- a/doc/fluid/api/evaluator.rst +++ /dev/null @@ -1,7 +0,0 @@ -.. THIS FILE IS GENERATED BY `gen_doc.{py|sh}` - !DO NOT EDIT THIS FILE MANUALLY! - -========= -evaluator -========= - diff --git a/doc/fluid/api/executor.rst b/doc/fluid/api/executor.rst index f67a14c49f..db2842e7f2 100644 --- a/doc/fluid/api/executor.rst +++ b/doc/fluid/api/executor.rst @@ -1,9 +1,11 @@ .. THIS FILE IS GENERATED BY `gen_doc.{py|sh}` !DO NOT EDIT THIS FILE MANUALLY! -======== -executor -======== +============== +fluid.executor +============== + +.. _api_fluid_executor_Executor: Executor -------- @@ -12,24 +14,32 @@ Executor :members: :noindex: +.. _api_fluid_executor_global_scope: + global_scope ------------ .. autofunction:: paddle.fluid.executor.global_scope :noindex: +.. _api_fluid_executor_scope_guard: + scope_guard ----------- .. autofunction:: paddle.fluid.executor.scope_guard :noindex: -switch_scope ------------- +.. _api_fluid_executor__switch_scope: + +_switch_scope +------------- -.. autofunction:: paddle.fluid.executor.switch_scope +.. autofunction:: paddle.fluid.executor._switch_scope :noindex: +.. _api_fluid_executor_fetch_var: + fetch_var --------- diff --git a/doc/fluid/api/fluid.rst b/doc/fluid/api/fluid.rst new file mode 100644 index 0000000000..51cdfe0c2e --- /dev/null +++ b/doc/fluid/api/fluid.rst @@ -0,0 +1,378 @@ +.. THIS FILE IS GENERATED BY `gen_doc.{py|sh}` + !DO NOT EDIT THIS FILE MANUALLY! + +===== +fluid +===== + +.. _api_fluid_Block: + +Block +----- + +.. autoclass:: paddle.fluid.Block + :members: + :noindex: + +.. _api_fluid_Variable: + +Variable +-------- + +.. autoclass:: paddle.fluid.Variable + :members: + :noindex: + +.. _api_fluid_Program: + +Program +------- + +.. autoclass:: paddle.fluid.Program + :members: + :noindex: + +.. _api_fluid_Operator: + +Operator +-------- + +.. autoclass:: paddle.fluid.Operator + :members: + :noindex: + +.. _api_fluid_default_startup_program: + +default_startup_program +----------------------- + +.. autofunction:: paddle.fluid.default_startup_program + :noindex: + +.. _api_fluid_default_main_program: + +default_main_program +-------------------- + +.. autofunction:: paddle.fluid.default_main_program + :noindex: + +.. _api_fluid_program_guard: + +program_guard +------------- + +.. autofunction:: paddle.fluid.program_guard + :noindex: + +.. _api_fluid_get_var: + +get_var +------- + +.. autofunction:: paddle.fluid.get_var + :noindex: + +.. _api_fluid_Executor: + +Executor +-------- + +.. autoclass:: paddle.fluid.Executor + :members: + :noindex: + +.. _api_fluid_global_scope: + +global_scope +------------ + +.. autofunction:: paddle.fluid.global_scope + :noindex: + +.. _api_fluid_scope_guard: + +scope_guard +----------- + +.. autofunction:: paddle.fluid.scope_guard + :noindex: + +.. _api_fluid__switch_scope: + +_switch_scope +------------- + +.. autofunction:: paddle.fluid._switch_scope + :noindex: + +.. _api_fluid_fetch_var: + +fetch_var +--------- + +.. autofunction:: paddle.fluid.fetch_var + :noindex: + +.. _api_fluid_Go: + +Go +-- + +.. autoclass:: paddle.fluid.Go + :members: + :noindex: + +.. _api_fluid_make_channel: + +make_channel +------------ + +.. autofunction:: paddle.fluid.make_channel + :noindex: + +.. _api_fluid_channel_send: + +channel_send +------------ + +.. autofunction:: paddle.fluid.channel_send + :noindex: + +.. _api_fluid_channel_recv: + +channel_recv +------------ + +.. autofunction:: paddle.fluid.channel_recv + :noindex: + +.. _api_fluid_channel_close: + +channel_close +------------- + +.. autofunction:: paddle.fluid.channel_close + :noindex: + +.. _api_fluid_Select: + +Select +------ + +.. autoclass:: paddle.fluid.Select + :members: + :noindex: + +.. _api_fluid_Trainer: + +Trainer +------- + +.. autoclass:: paddle.fluid.Trainer + :members: + :noindex: + +.. _api_fluid_BeginEpochEvent: + +BeginEpochEvent +--------------- + +.. autoclass:: paddle.fluid.BeginEpochEvent + :members: + :noindex: + +.. _api_fluid_EndEpochEvent: + +EndEpochEvent +------------- + +.. autoclass:: paddle.fluid.EndEpochEvent + :members: + :noindex: + +.. _api_fluid_BeginStepEvent: + +BeginStepEvent +-------------- + +.. autoclass:: paddle.fluid.BeginStepEvent + :members: + :noindex: + +.. _api_fluid_EndStepEvent: + +EndStepEvent +------------ + +.. autoclass:: paddle.fluid.EndStepEvent + :members: + :noindex: + +.. _api_fluid_CheckpointConfig: + +CheckpointConfig +---------------- + +.. autoclass:: paddle.fluid.CheckpointConfig + :members: + :noindex: + +.. _api_fluid_Inferencer: + +Inferencer +---------- + +.. autoclass:: paddle.fluid.Inferencer + :members: + :noindex: + +.. _api_fluid_DistributeTranspiler: + +DistributeTranspiler +-------------------- + +.. autoclass:: paddle.fluid.DistributeTranspiler + :members: + :noindex: + +.. _api_fluid_memory_optimize: + +memory_optimize +--------------- + +.. autofunction:: paddle.fluid.memory_optimize + :noindex: + +.. _api_fluid_release_memory: + +release_memory +-------------- + +.. autofunction:: paddle.fluid.release_memory + :noindex: + +.. _api_fluid_ParallelExecutor: + +ParallelExecutor +---------------- + +.. autoclass:: paddle.fluid.ParallelExecutor + :members: + :noindex: + +.. _api_fluid_ExecutionStrategy: + +ExecutionStrategy +----------------- + +.. autoclass:: paddle.fluid.ExecutionStrategy + :members: + :noindex: + +.. _api_fluid_BuildStrategy: + +BuildStrategy +------------- + +.. autoclass:: paddle.fluid.BuildStrategy + :members: + :noindex: + +.. _api_fluid_create_lod_tensor: + +create_lod_tensor +----------------- + +.. autofunction:: paddle.fluid.create_lod_tensor + :noindex: + +.. _api_fluid_create_random_int_lodtensor: + +create_random_int_lodtensor +--------------------------- + +.. autofunction:: paddle.fluid.create_random_int_lodtensor + :noindex: + +.. _api_fluid_LoDTensor: + +LoDTensor +--------- + +.. autoclass:: paddle.fluid.LoDTensor + :members: + :noindex: + +.. _api_fluid_CPUPlace: + +CPUPlace +-------- + +.. autoclass:: paddle.fluid.CPUPlace + :members: + :noindex: + +.. _api_fluid_CUDAPlace: + +CUDAPlace +--------- + +.. autoclass:: paddle.fluid.CUDAPlace + :members: + :noindex: + +.. _api_fluid_CUDAPinnedPlace: + +CUDAPinnedPlace +--------------- + +.. autoclass:: paddle.fluid.CUDAPinnedPlace + :members: + :noindex: + +.. _api_fluid_Tensor: + +Tensor +------ + +.. autoclass:: paddle.fluid.Tensor + :members: + :noindex: + +.. _api_fluid_ParamAttr: + +ParamAttr +--------- + +.. autoclass:: paddle.fluid.ParamAttr + :members: + :noindex: + +.. _api_fluid_WeightNormParamAttr: + +WeightNormParamAttr +------------------- + +.. autoclass:: paddle.fluid.WeightNormParamAttr + :members: + :noindex: + +.. _api_fluid_DataFeeder: + +DataFeeder +---------- + +.. autoclass:: paddle.fluid.DataFeeder + :members: + :noindex: + +.. _api_fluid_Scope: + +Scope +----- + +.. autoclass:: paddle.fluid.Scope + :members: + :noindex: + diff --git a/doc/fluid/api/gen_doc.py b/doc/fluid/api/gen_doc.py index 89ab880301..02efce2bf8 100644 --- a/doc/fluid/api/gen_doc.py +++ b/doc/fluid/api/gen_doc.py @@ -29,19 +29,27 @@ def parse_arg(): class DocGenerator(object): - def __init__(self, module_name, stream=sys.stdout): + def __init__(self, module_name=None, stream=sys.stdout): + if module_name == "": + module_name = None self.stream = stream - self.module_name = module_name - if not hasattr(fluid, module_name): - raise ValueError("Cannot find fluid.{0}".format(module_name)) + if module_name is None: + self.module_name = "fluid" else: - self.module = getattr(fluid, module_name) + self.module_name = "fluid." + module_name + if module_name is None: + self.module = fluid + else: + if not hasattr(fluid, module_name): + raise ValueError("Cannot find fluid.{0}".format(module_name)) + else: + self.module = getattr(fluid, module_name) self.stream.write('''.. THIS FILE IS GENERATED BY `gen_doc.{py|sh}` !DO NOT EDIT THIS FILE MANUALLY! ''') - self._print_header_(module_name, dot='=', is_title=True) + self._print_header_(self.module_name, dot='=', is_title=True) def print_submodule(self, submodule_name): submodule = getattr(self.module, submodule_name) @@ -60,25 +68,29 @@ class DocGenerator(object): self._print_header_(name, dot='=', is_title=False) def print_item(self, name): - item = getattr(self.module, name) + item = getattr(self.module, name, None) + if item is None: + return if isinstance(item, types.TypeType): self.print_class(name) elif isinstance(item, types.FunctionType): self.print_method(name) else: - raise RuntimeError("Unsupported item {0}".format(name)) + pass def print_class(self, name): + self._print_ref_(name) self._print_header_(name, dot='-', is_title=False) - self.stream.write('''.. autoclass:: paddle.fluid.{0}.{1} + self.stream.write('''.. autoclass:: paddle.{0}.{1} :members: :noindex: '''.format(self.module_name, name)) def print_method(self, name): + self._print_ref_(name) self._print_header_(name, dot='-', is_title=False) - self.stream.write('''.. autofunction:: paddle.fluid.{0}.{1} + self.stream.write('''.. autofunction:: paddle.{0}.{1} :noindex: '''.format(self.module_name, name)) @@ -94,6 +106,10 @@ class DocGenerator(object): self.stream.write('\n') self.stream.write('\n') + def _print_ref_(self, name): + self.stream.write(".. _api_{0}_{1}:\n\n".format("_".join( + self.module_name.split(".")), name)) + def main(): args = parse_arg() diff --git a/doc/fluid/api/gen_doc.sh b/doc/fluid/api/gen_doc.sh index 27f2419c06..b14ee29873 100755 --- a/doc/fluid/api/gen_doc.sh +++ b/doc/fluid/api/gen_doc.sh @@ -1,7 +1,9 @@ #!/bin/bash -python gen_doc.py layers --submodules control_flow device io nn ops tensor detection learning_rate_scheduler > layers.rst +python gen_doc.py layers --submodules control_flow device io nn ops tensor learning_rate_scheduler detection metric_op tensor > layers.rst -for module in data_feeder clip metrics executor initializer io nets optimizer param_attr profiler regularizer +for module in data_feeder clip metrics executor initializer io nets optimizer param_attr profiler regularizer transpiler recordio_writer backward average profiler do python gen_doc.py ${module} > ${module}.rst done + +python gen_doc.py "" > fluid.rst diff --git a/doc/fluid/api/index_en.rst b/doc/fluid/api/index_en.rst index 29cea9c682..359406819a 100644 --- a/doc/fluid/api/index_en.rst +++ b/doc/fluid/api/index_en.rst @@ -1,10 +1,11 @@ -====================== -Fluid -====================== +============= +API Reference +============= .. toctree:: :maxdepth: 1 + fluid.rst layers.rst data_feeder.rst executor.rst @@ -18,3 +19,8 @@ Fluid regularizer.rst io.rst data.rst + transpiler.rst + recordio_writer.rst + backward.rst + average.rst + profiler.rst diff --git a/doc/fluid/api/initializer.rst b/doc/fluid/api/initializer.rst index c49a98c744..dc0b52b14f 100644 --- a/doc/fluid/api/initializer.rst +++ b/doc/fluid/api/initializer.rst @@ -1,9 +1,11 @@ .. THIS FILE IS GENERATED BY `gen_doc.{py|sh}` !DO NOT EDIT THIS FILE MANUALLY! -=========== -initializer -=========== +================= +fluid.initializer +================= + +.. _api_fluid_initializer_Constant: Constant -------- @@ -12,6 +14,8 @@ Constant :members: :noindex: +.. _api_fluid_initializer_Uniform: + Uniform ------- @@ -19,6 +23,8 @@ Uniform :members: :noindex: +.. _api_fluid_initializer_Normal: + Normal ------ @@ -26,6 +32,8 @@ Normal :members: :noindex: +.. _api_fluid_initializer_Xavier: + Xavier ------ @@ -33,18 +41,42 @@ Xavier :members: :noindex: +.. _api_fluid_initializer_Bilinear: + +Bilinear +-------- + +.. autoclass:: paddle.fluid.initializer.Bilinear + :members: + :noindex: + +.. _api_fluid_initializer_MSRA: + +MSRA +---- + +.. autoclass:: paddle.fluid.initializer.MSRA + :members: + :noindex: + +.. _api_fluid_initializer_force_init_on_cpu: + force_init_on_cpu ----------------- .. autofunction:: paddle.fluid.initializer.force_init_on_cpu :noindex: +.. _api_fluid_initializer_init_on_cpu: + init_on_cpu ----------- .. autofunction:: paddle.fluid.initializer.init_on_cpu :noindex: +.. _api_fluid_initializer_ConstantInitializer: + ConstantInitializer ------------------- @@ -52,6 +84,8 @@ ConstantInitializer :members: :noindex: +.. _api_fluid_initializer_UniformInitializer: + UniformInitializer ------------------ @@ -59,6 +93,8 @@ UniformInitializer :members: :noindex: +.. _api_fluid_initializer_NormalInitializer: + NormalInitializer ----------------- @@ -66,6 +102,8 @@ NormalInitializer :members: :noindex: +.. _api_fluid_initializer_XavierInitializer: + XavierInitializer ----------------- @@ -73,3 +111,21 @@ XavierInitializer :members: :noindex: +.. _api_fluid_initializer_BilinearInitializer: + +BilinearInitializer +------------------- + +.. autoclass:: paddle.fluid.initializer.BilinearInitializer + :members: + :noindex: + +.. _api_fluid_initializer_MSRAInitializer: + +MSRAInitializer +--------------- + +.. autoclass:: paddle.fluid.initializer.MSRAInitializer + :members: + :noindex: + diff --git a/doc/fluid/api/io.rst b/doc/fluid/api/io.rst index dd9d88b669..7cee0bc4d9 100644 --- a/doc/fluid/api/io.rst +++ b/doc/fluid/api/io.rst @@ -1,9 +1,11 @@ .. THIS FILE IS GENERATED BY `gen_doc.{py|sh}` !DO NOT EDIT THIS FILE MANUALLY! -== -io -== +======== +fluid.io +======== + +.. _api_fluid_io_save_vars: save_vars --------- @@ -11,51 +13,115 @@ save_vars .. autofunction:: paddle.fluid.io.save_vars :noindex: +.. _api_fluid_io_save_params: + save_params ----------- .. autofunction:: paddle.fluid.io.save_params :noindex: +.. _api_fluid_io_save_persistables: + save_persistables ----------------- .. autofunction:: paddle.fluid.io.save_persistables :noindex: +.. _api_fluid_io_load_vars: + load_vars --------- .. autofunction:: paddle.fluid.io.load_vars :noindex: +.. _api_fluid_io_load_params: + load_params ----------- .. autofunction:: paddle.fluid.io.load_params :noindex: +.. _api_fluid_io_load_persistables: + load_persistables ----------------- .. autofunction:: paddle.fluid.io.load_persistables :noindex: +.. _api_fluid_io_save_inference_model: + save_inference_model -------------------- .. autofunction:: paddle.fluid.io.save_inference_model :noindex: +.. _api_fluid_io_load_inference_model: + load_inference_model -------------------- .. autofunction:: paddle.fluid.io.load_inference_model :noindex: +.. _api_fluid_io_get_inference_program: + get_inference_program --------------------- .. autofunction:: paddle.fluid.io.get_inference_program :noindex: +.. _api_fluid_io_save_checkpoint: + +save_checkpoint +--------------- + +.. autofunction:: paddle.fluid.io.save_checkpoint + :noindex: + +.. _api_fluid_io_load_checkpoint: + +load_checkpoint +--------------- + +.. autofunction:: paddle.fluid.io.load_checkpoint + :noindex: + +.. _api_fluid_io_clean_checkpoint: + +clean_checkpoint +---------------- + +.. autofunction:: paddle.fluid.io.clean_checkpoint + :noindex: + +.. _api_fluid_io_load_persist_vars_without_grad: + +load_persist_vars_without_grad +------------------------------ + +.. autofunction:: paddle.fluid.io.load_persist_vars_without_grad + :noindex: + +.. _api_fluid_io_save_persist_vars_without_grad: + +save_persist_vars_without_grad +------------------------------ + +.. autofunction:: paddle.fluid.io.save_persist_vars_without_grad + :noindex: + +.. _api_fluid_io_get_latest_checkpoint_serial: + +get_latest_checkpoint_serial +---------------------------- + +.. autofunction:: paddle.fluid.io.get_latest_checkpoint_serial + :noindex: + diff --git a/doc/fluid/api/layers.rst b/doc/fluid/api/layers.rst index 8d1c9247b1..d443c49657 100644 --- a/doc/fluid/api/layers.rst +++ b/doc/fluid/api/layers.rst @@ -1,25 +1,31 @@ .. THIS FILE IS GENERATED BY `gen_doc.{py|sh}` !DO NOT EDIT THIS FILE MANUALLY! -====== -layers -====== +============ +fluid.layers +============ control_flow ============ +.. _api_fluid_layers_split_lod_tensor: + split_lod_tensor ---------------- .. autofunction:: paddle.fluid.layers.split_lod_tensor :noindex: +.. _api_fluid_layers_merge_lod_tensor: + merge_lod_tensor ---------------- .. autofunction:: paddle.fluid.layers.merge_lod_tensor :noindex: +.. _api_fluid_layers_BlockGuard: + BlockGuard ---------- @@ -27,6 +33,8 @@ BlockGuard :members: :noindex: +.. _api_fluid_layers_BlockGuardWithCompletion: + BlockGuardWithCompletion ------------------------ @@ -34,12 +42,7 @@ BlockGuardWithCompletion :members: :noindex: -StaticRNNMemoryLink -------------------- - -.. autoclass:: paddle.fluid.layers.StaticRNNMemoryLink - :members: - :noindex: +.. _api_fluid_layers_WhileGuard: WhileGuard ---------- @@ -48,6 +51,8 @@ WhileGuard :members: :noindex: +.. _api_fluid_layers_While: + While ----- @@ -55,6 +60,8 @@ While :members: :noindex: +.. _api_fluid_layers_Switch: + Switch ------ @@ -62,78 +69,104 @@ Switch :members: :noindex: +.. _api_fluid_layers_lod_rank_table: + lod_rank_table -------------- .. autofunction:: paddle.fluid.layers.lod_rank_table :noindex: +.. _api_fluid_layers_max_sequence_len: + max_sequence_len ---------------- .. autofunction:: paddle.fluid.layers.max_sequence_len :noindex: +.. _api_fluid_layers_lod_tensor_to_array: + lod_tensor_to_array ------------------- .. autofunction:: paddle.fluid.layers.lod_tensor_to_array :noindex: +.. _api_fluid_layers_array_to_lod_tensor: + array_to_lod_tensor ------------------- .. autofunction:: paddle.fluid.layers.array_to_lod_tensor :noindex: +.. _api_fluid_layers_increment: + increment --------- .. autofunction:: paddle.fluid.layers.increment :noindex: +.. _api_fluid_layers_array_write: + array_write ----------- .. autofunction:: paddle.fluid.layers.array_write :noindex: +.. _api_fluid_layers_create_array: + create_array ------------ .. autofunction:: paddle.fluid.layers.create_array :noindex: +.. _api_fluid_layers_less_than: + less_than --------- .. autofunction:: paddle.fluid.layers.less_than :noindex: +.. _api_fluid_layers_equal: + equal ----- .. autofunction:: paddle.fluid.layers.equal :noindex: +.. _api_fluid_layers_array_read: + array_read ---------- .. autofunction:: paddle.fluid.layers.array_read :noindex: +.. _api_fluid_layers_shrink_memory: + shrink_memory ------------- .. autofunction:: paddle.fluid.layers.shrink_memory :noindex: +.. _api_fluid_layers_array_length: + array_length ------------ .. autofunction:: paddle.fluid.layers.array_length :noindex: +.. _api_fluid_layers_IfElse: + IfElse ------ @@ -141,6 +174,8 @@ IfElse :members: :noindex: +.. _api_fluid_layers_DynamicRNN: + DynamicRNN ---------- @@ -148,6 +183,8 @@ DynamicRNN :members: :noindex: +.. _api_fluid_layers_ConditionalBlock: + ConditionalBlock ---------------- @@ -155,6 +192,8 @@ ConditionalBlock :members: :noindex: +.. _api_fluid_layers_StaticRNN: + StaticRNN --------- @@ -162,12 +201,16 @@ StaticRNN :members: :noindex: +.. _api_fluid_layers_reorder_lod_tensor_by_rank: + reorder_lod_tensor_by_rank -------------------------- .. autofunction:: paddle.fluid.layers.reorder_lod_tensor_by_rank :noindex: +.. _api_fluid_layers_ParallelDo: + ParallelDo ---------- @@ -175,15 +218,27 @@ ParallelDo :members: :noindex: +.. _api_fluid_layers_Print: + Print ----- .. autofunction:: paddle.fluid.layers.Print :noindex: +.. _api_fluid_layers_is_empty: + +is_empty +-------- + +.. autofunction:: paddle.fluid.layers.is_empty + :noindex: + device ====== +.. _api_fluid_layers_get_places: + get_places ---------- @@ -193,12 +248,16 @@ get_places io == +.. _api_fluid_layers_data: + data ---- .. autofunction:: paddle.fluid.layers.data :noindex: +.. _api_fluid_layers_BlockGuardServ: + BlockGuardServ -------------- @@ -206,6 +265,8 @@ BlockGuardServ :members: :noindex: +.. _api_fluid_layers_ListenAndServ: + ListenAndServ ------------- @@ -213,195 +274,291 @@ ListenAndServ :members: :noindex: +.. _api_fluid_layers_Send: + Send ---- .. autofunction:: paddle.fluid.layers.Send :noindex: +.. _api_fluid_layers_Recv: + +Recv +---- + +.. autofunction:: paddle.fluid.layers.Recv + :noindex: + +.. _api_fluid_layers_open_recordio_file: + open_recordio_file ------------------ .. autofunction:: paddle.fluid.layers.open_recordio_file :noindex: +.. _api_fluid_layers_open_files: + open_files ---------- .. autofunction:: paddle.fluid.layers.open_files :noindex: +.. _api_fluid_layers_read_file: + read_file --------- .. autofunction:: paddle.fluid.layers.read_file :noindex: +.. _api_fluid_layers_shuffle: + shuffle ------- .. autofunction:: paddle.fluid.layers.shuffle :noindex: +.. _api_fluid_layers_batch: + batch ----- .. autofunction:: paddle.fluid.layers.batch :noindex: +.. _api_fluid_layers_double_buffer: + double_buffer ------------- .. autofunction:: paddle.fluid.layers.double_buffer :noindex: +.. _api_fluid_layers_random_data_generator: + +random_data_generator +--------------------- + +.. autofunction:: paddle.fluid.layers.random_data_generator + :noindex: + +.. _api_fluid_layers_Preprocessor: + +Preprocessor +------------ + +.. autoclass:: paddle.fluid.layers.Preprocessor + :members: + :noindex: + +.. _api_fluid_layers_load: + +load +---- + +.. autofunction:: paddle.fluid.layers.load + :noindex: + nn == +.. _api_fluid_layers_fc: + fc -- .. autofunction:: paddle.fluid.layers.fc :noindex: +.. _api_fluid_layers_embedding: + embedding --------- .. autofunction:: paddle.fluid.layers.embedding :noindex: +.. _api_fluid_layers_dynamic_lstm: + dynamic_lstm ------------ .. autofunction:: paddle.fluid.layers.dynamic_lstm :noindex: +.. _api_fluid_layers_dynamic_lstmp: + dynamic_lstmp ------------- .. autofunction:: paddle.fluid.layers.dynamic_lstmp :noindex: +.. _api_fluid_layers_dynamic_gru: + dynamic_gru ----------- .. autofunction:: paddle.fluid.layers.dynamic_gru :noindex: +.. _api_fluid_layers_gru_unit: + gru_unit -------- .. autofunction:: paddle.fluid.layers.gru_unit :noindex: +.. _api_fluid_layers_linear_chain_crf: + linear_chain_crf ---------------- .. autofunction:: paddle.fluid.layers.linear_chain_crf :noindex: +.. _api_fluid_layers_crf_decoding: + crf_decoding ------------ .. autofunction:: paddle.fluid.layers.crf_decoding :noindex: +.. _api_fluid_layers_cos_sim: + cos_sim ------- .. autofunction:: paddle.fluid.layers.cos_sim :noindex: +.. _api_fluid_layers_cross_entropy: + cross_entropy ------------- .. autofunction:: paddle.fluid.layers.cross_entropy :noindex: +.. _api_fluid_layers_square_error_cost: + square_error_cost ----------------- .. autofunction:: paddle.fluid.layers.square_error_cost :noindex: +.. _api_fluid_layers_chunk_eval: + chunk_eval ---------- .. autofunction:: paddle.fluid.layers.chunk_eval :noindex: +.. _api_fluid_layers_sequence_conv: + sequence_conv ------------- .. autofunction:: paddle.fluid.layers.sequence_conv :noindex: +.. _api_fluid_layers_conv2d: + conv2d ------ .. autofunction:: paddle.fluid.layers.conv2d :noindex: +.. _api_fluid_layers_conv3d: + conv3d ------ .. autofunction:: paddle.fluid.layers.conv3d :noindex: +.. _api_fluid_layers_sequence_pool: + sequence_pool ------------- .. autofunction:: paddle.fluid.layers.sequence_pool :noindex: +.. _api_fluid_layers_sequence_softmax: + sequence_softmax ---------------- .. autofunction:: paddle.fluid.layers.sequence_softmax :noindex: +.. _api_fluid_layers_softmax: + softmax ------- .. autofunction:: paddle.fluid.layers.softmax :noindex: +.. _api_fluid_layers_pool2d: + pool2d ------ .. autofunction:: paddle.fluid.layers.pool2d :noindex: +.. _api_fluid_layers_pool3d: + pool3d ------ .. autofunction:: paddle.fluid.layers.pool3d :noindex: +.. _api_fluid_layers_batch_norm: + batch_norm ---------- .. autofunction:: paddle.fluid.layers.batch_norm :noindex: +.. _api_fluid_layers_beam_search_decode: + beam_search_decode ------------------ .. autofunction:: paddle.fluid.layers.beam_search_decode :noindex: +.. _api_fluid_layers_conv2d_transpose: + conv2d_transpose ---------------- .. autofunction:: paddle.fluid.layers.conv2d_transpose :noindex: +.. _api_fluid_layers_conv3d_transpose: + conv3d_transpose ---------------- -.. autofunction:: paddle.fluid.layers.conv2d_transpose +.. autofunction:: paddle.fluid.layers.conv3d_transpose :noindex: +.. _api_fluid_layers_sequence_expand: sequence_expand --------------- @@ -409,320 +566,498 @@ sequence_expand .. autofunction:: paddle.fluid.layers.sequence_expand :noindex: +.. _api_fluid_layers_lstm_unit: + lstm_unit --------- .. autofunction:: paddle.fluid.layers.lstm_unit :noindex: +.. _api_fluid_layers_reduce_sum: + reduce_sum ---------- .. autofunction:: paddle.fluid.layers.reduce_sum :noindex: +.. _api_fluid_layers_reduce_mean: + reduce_mean ----------- .. autofunction:: paddle.fluid.layers.reduce_mean :noindex: +.. _api_fluid_layers_reduce_max: + reduce_max ---------- .. autofunction:: paddle.fluid.layers.reduce_max :noindex: +.. _api_fluid_layers_reduce_min: + reduce_min ---------- .. autofunction:: paddle.fluid.layers.reduce_min :noindex: +.. _api_fluid_layers_reduce_prod: + reduce_prod ----------- .. autofunction:: paddle.fluid.layers.reduce_prod :noindex: +.. _api_fluid_layers_sequence_first_step: + sequence_first_step ------------------- .. autofunction:: paddle.fluid.layers.sequence_first_step :noindex: +.. _api_fluid_layers_sequence_last_step: + sequence_last_step ------------------ .. autofunction:: paddle.fluid.layers.sequence_last_step :noindex: +.. _api_fluid_layers_dropout: + dropout ------- .. autofunction:: paddle.fluid.layers.dropout :noindex: +.. _api_fluid_layers_split: + split ----- .. autofunction:: paddle.fluid.layers.split :noindex: +.. _api_fluid_layers_ctc_greedy_decoder: + ctc_greedy_decoder ------------------ .. autofunction:: paddle.fluid.layers.ctc_greedy_decoder :noindex: +.. _api_fluid_layers_edit_distance: + edit_distance ------------- .. autofunction:: paddle.fluid.layers.edit_distance :noindex: +.. _api_fluid_layers_l2_normalize: + l2_normalize ------------ .. autofunction:: paddle.fluid.layers.l2_normalize :noindex: +.. _api_fluid_layers_matmul: + matmul ------ .. autofunction:: paddle.fluid.layers.matmul :noindex: +.. _api_fluid_layers_topk: + topk ---- .. autofunction:: paddle.fluid.layers.topk :noindex: +.. _api_fluid_layers_warpctc: + warpctc ------- .. autofunction:: paddle.fluid.layers.warpctc :noindex: +.. _api_fluid_layers_sequence_reshape: + sequence_reshape ---------------- .. autofunction:: paddle.fluid.layers.sequence_reshape :noindex: +.. _api_fluid_layers_transpose: + transpose --------- .. autofunction:: paddle.fluid.layers.transpose :noindex: +.. _api_fluid_layers_im2sequence: + im2sequence ----------- .. autofunction:: paddle.fluid.layers.im2sequence :noindex: +.. _api_fluid_layers_nce: + nce --- .. autofunction:: paddle.fluid.layers.nce :noindex: +.. _api_fluid_layers_beam_search: + beam_search ----------- .. autofunction:: paddle.fluid.layers.beam_search :noindex: +.. _api_fluid_layers_row_conv: + row_conv -------- .. autofunction:: paddle.fluid.layers.row_conv :noindex: +.. _api_fluid_layers_multiplex: + multiplex --------- .. autofunction:: paddle.fluid.layers.multiplex :noindex: +.. _api_fluid_layers_layer_norm: + layer_norm ---------- .. autofunction:: paddle.fluid.layers.layer_norm :noindex: +.. _api_fluid_layers_softmax_with_cross_entropy: + softmax_with_cross_entropy -------------------------- .. autofunction:: paddle.fluid.layers.softmax_with_cross_entropy :noindex: +.. _api_fluid_layers_smooth_l1: + smooth_l1 --------- .. autofunction:: paddle.fluid.layers.smooth_l1 :noindex: +.. _api_fluid_layers_one_hot: + one_hot ------- .. autofunction:: paddle.fluid.layers.one_hot :noindex: +.. _api_fluid_layers_autoincreased_step_counter: + autoincreased_step_counter -------------------------- .. autofunction:: paddle.fluid.layers.autoincreased_step_counter :noindex: +.. _api_fluid_layers_reshape: + reshape ------- .. autofunction:: paddle.fluid.layers.reshape :noindex: +.. _api_fluid_layers_lod_reset: + lod_reset --------- .. autofunction:: paddle.fluid.layers.lod_reset :noindex: +.. _api_fluid_layers_lrn: + lrn --- .. autofunction:: paddle.fluid.layers.lrn :noindex: +.. _api_fluid_layers_pad: + pad --- .. autofunction:: paddle.fluid.layers.pad :noindex: +.. _api_fluid_layers_label_smooth: + label_smooth ------------ .. autofunction:: paddle.fluid.layers.label_smooth :noindex: +.. _api_fluid_layers_roi_pool: + roi_pool -------- .. autofunction:: paddle.fluid.layers.roi_pool :noindex: +.. _api_fluid_layers_dice_loss: + +dice_loss +--------- + +.. autofunction:: paddle.fluid.layers.dice_loss + :noindex: + +.. _api_fluid_layers_image_resize: + +image_resize +------------ + +.. autofunction:: paddle.fluid.layers.image_resize + :noindex: + +.. _api_fluid_layers_image_resize_short: + +image_resize_short +------------------ + +.. autofunction:: paddle.fluid.layers.image_resize_short + :noindex: + +.. _api_fluid_layers_resize_bilinear: + +resize_bilinear +--------------- + +.. autofunction:: paddle.fluid.layers.resize_bilinear + :noindex: + +.. _api_fluid_layers_gather: + +gather +------ + +.. autofunction:: paddle.fluid.layers.gather + :noindex: + +.. _api_fluid_layers_random_crop: + +random_crop +----------- + +.. autofunction:: paddle.fluid.layers.random_crop + :noindex: + +.. _api_fluid_layers_mean_iou: + +mean_iou +-------- + +.. autofunction:: paddle.fluid.layers.mean_iou + :noindex: + +.. _api_fluid_layers_relu: + +relu +---- + +.. autofunction:: paddle.fluid.layers.relu + :noindex: + +.. _api_fluid_layers_log: + +log +--- + +.. autofunction:: paddle.fluid.layers.log + :noindex: + +.. _api_fluid_layers_crop: + +crop +---- + +.. autofunction:: paddle.fluid.layers.crop + :noindex: + ops === +.. _api_fluid_layers_mean: + mean ---- .. autofunction:: paddle.fluid.layers.mean :noindex: +.. _api_fluid_layers_mul: + mul --- .. autofunction:: paddle.fluid.layers.mul :noindex: +.. _api_fluid_layers_scale: + scale ----- .. autofunction:: paddle.fluid.layers.scale :noindex: +.. _api_fluid_layers_sigmoid_cross_entropy_with_logits: + sigmoid_cross_entropy_with_logits --------------------------------- .. autofunction:: paddle.fluid.layers.sigmoid_cross_entropy_with_logits :noindex: +.. _api_fluid_layers_elementwise_add: + elementwise_add --------------- .. autofunction:: paddle.fluid.layers.elementwise_add :noindex: +.. _api_fluid_layers_elementwise_div: + elementwise_div --------------- .. autofunction:: paddle.fluid.layers.elementwise_div :noindex: +.. _api_fluid_layers_elementwise_sub: + elementwise_sub --------------- .. autofunction:: paddle.fluid.layers.elementwise_sub :noindex: +.. _api_fluid_layers_elementwise_mul: + elementwise_mul --------------- .. autofunction:: paddle.fluid.layers.elementwise_mul :noindex: +.. _api_fluid_layers_elementwise_max: + elementwise_max --------------- .. autofunction:: paddle.fluid.layers.elementwise_max :noindex: +.. _api_fluid_layers_elementwise_min: + elementwise_min --------------- .. autofunction:: paddle.fluid.layers.elementwise_min :noindex: +.. _api_fluid_layers_elementwise_pow: + elementwise_pow --------------- .. autofunction:: paddle.fluid.layers.elementwise_pow :noindex: +.. _api_fluid_layers_clip: + clip ---- .. autofunction:: paddle.fluid.layers.clip :noindex: +.. _api_fluid_layers_clip_by_norm: + clip_by_norm ------------ .. autofunction:: paddle.fluid.layers.clip_by_norm :noindex: +.. _api_fluid_layers_logical_and: + logical_and ----------- .. autofunction:: paddle.fluid.layers.logical_and :noindex: +.. _api_fluid_layers_logical_or: + logical_or ---------- .. autofunction:: paddle.fluid.layers.logical_or :noindex: +.. _api_fluid_layers_logical_xor: + logical_xor ----------- .. autofunction:: paddle.fluid.layers.logical_xor :noindex: +.. _api_fluid_layers_logical_not: + logical_not ----------- .. autofunction:: paddle.fluid.layers.logical_not :noindex: -uniform_random --------------- - -.. autofunction:: paddle.fluid.layers.uniform_random - :noindex: +.. _api_fluid_layers_uniform_random_batch_size_like: uniform_random_batch_size_like ------------------------------ @@ -730,23 +1065,23 @@ uniform_random_batch_size_like .. autofunction:: paddle.fluid.layers.uniform_random_batch_size_like :noindex: +.. _api_fluid_layers_gaussian_random: + gaussian_random --------------- .. autofunction:: paddle.fluid.layers.gaussian_random :noindex: +.. _api_fluid_layers_gaussian_random_batch_size_like: + gaussian_random_batch_size_like ------------------------------- .. autofunction:: paddle.fluid.layers.gaussian_random_batch_size_like :noindex: -cumsum ------- - -.. autofunction:: paddle.fluid.layers.cumsum - :noindex: +.. _api_fluid_layers_scatter: scatter ------- @@ -754,35 +1089,79 @@ scatter .. autofunction:: paddle.fluid.layers.scatter :noindex: +.. _api_fluid_layers_sum: + sum --- .. autofunction:: paddle.fluid.layers.sum :noindex: +.. _api_fluid_layers_slice: + +slice +----- + +.. autofunction:: paddle.fluid.layers.slice + :noindex: + +.. _api_fluid_layers_polygon_box_transform: + +polygon_box_transform +--------------------- + +.. autofunction:: paddle.fluid.layers.polygon_box_transform + :noindex: + +.. _api_fluid_layers_shape: + +shape +----- + +.. autofunction:: paddle.fluid.layers.shape + :noindex: + +.. _api_fluid_layers_iou_similarity: + +iou_similarity +-------------- + +.. autofunction:: paddle.fluid.layers.iou_similarity + :noindex: + +.. _api_fluid_layers_maxout: + +maxout +------ + +.. autofunction:: paddle.fluid.layers.maxout + :noindex: + +.. _api_fluid_layers_sigmoid: + sigmoid ------- .. autofunction:: paddle.fluid.layers.sigmoid :noindex: +.. _api_fluid_layers_logsigmoid: + logsigmoid ---------- .. autofunction:: paddle.fluid.layers.logsigmoid :noindex: +.. _api_fluid_layers_exp: + exp --- .. autofunction:: paddle.fluid.layers.exp :noindex: -relu ----- - -.. autofunction:: paddle.fluid.layers.relu - :noindex: +.. _api_fluid_layers_tanh: tanh ---- @@ -790,71 +1169,87 @@ tanh .. autofunction:: paddle.fluid.layers.tanh :noindex: +.. _api_fluid_layers_tanh_shrink: + tanh_shrink ----------- .. autofunction:: paddle.fluid.layers.tanh_shrink :noindex: +.. _api_fluid_layers_softshrink: + softshrink ---------- .. autofunction:: paddle.fluid.layers.softshrink :noindex: +.. _api_fluid_layers_sqrt: + sqrt ---- .. autofunction:: paddle.fluid.layers.sqrt :noindex: +.. _api_fluid_layers_abs: + abs --- .. autofunction:: paddle.fluid.layers.abs :noindex: +.. _api_fluid_layers_ceil: + ceil ---- .. autofunction:: paddle.fluid.layers.ceil :noindex: +.. _api_fluid_layers_floor: + floor ----- .. autofunction:: paddle.fluid.layers.floor :noindex: +.. _api_fluid_layers_cos: + cos --- .. autofunction:: paddle.fluid.layers.cos :noindex: +.. _api_fluid_layers_sin: + sin --- .. autofunction:: paddle.fluid.layers.sin :noindex: +.. _api_fluid_layers_round: + round ----- .. autofunction:: paddle.fluid.layers.round :noindex: +.. _api_fluid_layers_reciprocal: + reciprocal ---------- .. autofunction:: paddle.fluid.layers.reciprocal :noindex: -log ---- - -.. autofunction:: paddle.fluid.layers.log - :noindex: +.. _api_fluid_layers_square: square ------ @@ -862,71 +1257,79 @@ square .. autofunction:: paddle.fluid.layers.square :noindex: +.. _api_fluid_layers_softplus: + softplus -------- .. autofunction:: paddle.fluid.layers.softplus :noindex: +.. _api_fluid_layers_softsign: + softsign -------- .. autofunction:: paddle.fluid.layers.softsign :noindex: +.. _api_fluid_layers_brelu: + brelu ----- .. autofunction:: paddle.fluid.layers.brelu :noindex: +.. _api_fluid_layers_leaky_relu: + leaky_relu ---------- .. autofunction:: paddle.fluid.layers.leaky_relu :noindex: +.. _api_fluid_layers_soft_relu: + soft_relu --------- .. autofunction:: paddle.fluid.layers.soft_relu :noindex: +.. _api_fluid_layers_elu: + elu --- .. autofunction:: paddle.fluid.layers.elu :noindex: +.. _api_fluid_layers_relu6: + relu6 ----- .. autofunction:: paddle.fluid.layers.relu6 :noindex: +.. _api_fluid_layers_pow: + pow --- .. autofunction:: paddle.fluid.layers.pow :noindex: +.. _api_fluid_layers_stanh: + stanh ----- .. autofunction:: paddle.fluid.layers.stanh :noindex: -hard_shrink ------------ - -.. autofunction:: paddle.fluid.layers.hard_shrink - :noindex: - -thresholded_relu ----------------- - -.. autofunction:: paddle.fluid.layers.thresholded_relu - :noindex: +.. _api_fluid_layers_hard_sigmoid: hard_sigmoid ------------ @@ -934,168 +1337,434 @@ hard_sigmoid .. autofunction:: paddle.fluid.layers.hard_sigmoid :noindex: +.. _api_fluid_layers_swish: + swish ----- .. autofunction:: paddle.fluid.layers.swish :noindex: +.. _api_fluid_layers_uniform_random: + +uniform_random +-------------- + +.. autofunction:: paddle.fluid.layers.uniform_random + :noindex: + +.. _api_fluid_layers_hard_shrink: + +hard_shrink +----------- + +.. autofunction:: paddle.fluid.layers.hard_shrink + :noindex: + +.. _api_fluid_layers_cumsum: + +cumsum +------ + +.. autofunction:: paddle.fluid.layers.cumsum + :noindex: + +.. _api_fluid_layers_thresholded_relu: + +thresholded_relu +---------------- + +.. autofunction:: paddle.fluid.layers.thresholded_relu + :noindex: + tensor ====== +.. _api_fluid_layers_create_tensor: + create_tensor ------------- .. autofunction:: paddle.fluid.layers.create_tensor :noindex: +.. _api_fluid_layers_create_parameter: + create_parameter ---------------- .. autofunction:: paddle.fluid.layers.create_parameter :noindex: +.. _api_fluid_layers_create_global_var: + create_global_var ----------------- .. autofunction:: paddle.fluid.layers.create_global_var :noindex: +.. _api_fluid_layers_cast: + cast ---- .. autofunction:: paddle.fluid.layers.cast :noindex: +.. _api_fluid_layers_concat: + concat ------ .. autofunction:: paddle.fluid.layers.concat :noindex: +.. _api_fluid_layers_sums: + sums ---- .. autofunction:: paddle.fluid.layers.sums :noindex: +.. _api_fluid_layers_assign: + assign ------ .. autofunction:: paddle.fluid.layers.assign :noindex: +.. _api_fluid_layers_fill_constant_batch_size_like: + fill_constant_batch_size_like ----------------------------- .. autofunction:: paddle.fluid.layers.fill_constant_batch_size_like :noindex: +.. _api_fluid_layers_fill_constant: + fill_constant ------------- .. autofunction:: paddle.fluid.layers.fill_constant :noindex: +.. _api_fluid_layers_argmin: + +argmin +------ + +.. autofunction:: paddle.fluid.layers.argmin + :noindex: + +.. _api_fluid_layers_argmax: + +argmax +------ + +.. autofunction:: paddle.fluid.layers.argmax + :noindex: + +.. _api_fluid_layers_argsort: + +argsort +------- + +.. autofunction:: paddle.fluid.layers.argsort + :noindex: + +.. _api_fluid_layers_ones: + ones ---- .. autofunction:: paddle.fluid.layers.ones :noindex: +.. _api_fluid_layers_zeros: + zeros ----- .. autofunction:: paddle.fluid.layers.zeros :noindex: +.. _api_fluid_layers_reverse: + +reverse +------- + +.. autofunction:: paddle.fluid.layers.reverse + :noindex: + +learning_rate_scheduler +======================= + +.. _api_fluid_layers_exponential_decay: + +exponential_decay +----------------- + +.. autofunction:: paddle.fluid.layers.exponential_decay + :noindex: + +.. _api_fluid_layers_natural_exp_decay: + +natural_exp_decay +----------------- + +.. autofunction:: paddle.fluid.layers.natural_exp_decay + :noindex: + +.. _api_fluid_layers_inverse_time_decay: + +inverse_time_decay +------------------ + +.. autofunction:: paddle.fluid.layers.inverse_time_decay + :noindex: + +.. _api_fluid_layers_polynomial_decay: + +polynomial_decay +---------------- + +.. autofunction:: paddle.fluid.layers.polynomial_decay + :noindex: + +.. _api_fluid_layers_piecewise_decay: + +piecewise_decay +--------------- + +.. autofunction:: paddle.fluid.layers.piecewise_decay + :noindex: + +.. _api_fluid_layers_noam_decay: + +noam_decay +---------- + +.. autofunction:: paddle.fluid.layers.noam_decay + :noindex: + +.. _api_fluid_layers_append_LARS: + +append_LARS +----------- + +.. autofunction:: paddle.fluid.layers.append_LARS + :noindex: + detection ========= +.. _api_fluid_layers_prior_box: + +prior_box +--------- + +.. autofunction:: paddle.fluid.layers.prior_box + :noindex: + +.. _api_fluid_layers_multi_box_head: + multi_box_head -------------- .. autofunction:: paddle.fluid.layers.multi_box_head :noindex: +.. _api_fluid_layers_bipartite_match: + bipartite_match --------------- .. autofunction:: paddle.fluid.layers.bipartite_match :noindex: +.. _api_fluid_layers_target_assign: + target_assign ------------- .. autofunction:: paddle.fluid.layers.target_assign :noindex: +.. _api_fluid_layers_detection_output: + detection_output ---------------- .. autofunction:: paddle.fluid.layers.detection_output :noindex: +.. _api_fluid_layers_ssd_loss: + ssd_loss -------- .. autofunction:: paddle.fluid.layers.ssd_loss :noindex: +.. _api_fluid_layers_detection_map: + detection_map ------------- .. autofunction:: paddle.fluid.layers.detection_map :noindex: +.. _api_fluid_layers_iou_similarity: + iou_similarity -------------- .. autofunction:: paddle.fluid.layers.iou_similarity :noindex: +.. _api_fluid_layers_box_coder: + box_coder --------- .. autofunction:: paddle.fluid.layers.box_coder :noindex: -learning_rate_scheduler -======================= +metric_op +========= -exponential_decay ------------------ +.. _api_fluid_layers_accuracy: -.. autofunction:: paddle.fluid.layers.exponential_decay +accuracy +-------- + +.. autofunction:: paddle.fluid.layers.accuracy :noindex: -natural_exp_decay ------------------ +.. _api_fluid_layers_auc: -.. autofunction:: paddle.fluid.layers.natural_exp_decay +auc +--- + +.. autofunction:: paddle.fluid.layers.auc :noindex: -inverse_time_decay ------------------- +tensor +====== -.. autofunction:: paddle.fluid.layers.inverse_time_decay +.. _api_fluid_layers_create_tensor: + +create_tensor +------------- + +.. autofunction:: paddle.fluid.layers.create_tensor :noindex: -polynomial_decay +.. _api_fluid_layers_create_parameter: + +create_parameter ---------------- -.. autofunction:: paddle.fluid.layers.polynomial_decay +.. autofunction:: paddle.fluid.layers.create_parameter :noindex: -piecewise_decay ---------------- +.. _api_fluid_layers_create_global_var: -.. autofunction:: paddle.fluid.layers.piecewise_decay +create_global_var +----------------- + +.. autofunction:: paddle.fluid.layers.create_global_var :noindex: -noam_decay ----------- +.. _api_fluid_layers_cast: -.. autofunction:: paddle.fluid.layers.noam_decay +cast +---- + +.. autofunction:: paddle.fluid.layers.cast + :noindex: + +.. _api_fluid_layers_concat: + +concat +------ + +.. autofunction:: paddle.fluid.layers.concat + :noindex: + +.. _api_fluid_layers_sums: + +sums +---- + +.. autofunction:: paddle.fluid.layers.sums + :noindex: + +.. _api_fluid_layers_assign: + +assign +------ + +.. autofunction:: paddle.fluid.layers.assign + :noindex: + +.. _api_fluid_layers_fill_constant_batch_size_like: + +fill_constant_batch_size_like +----------------------------- + +.. autofunction:: paddle.fluid.layers.fill_constant_batch_size_like + :noindex: + +.. _api_fluid_layers_fill_constant: + +fill_constant +------------- + +.. autofunction:: paddle.fluid.layers.fill_constant + :noindex: + +.. _api_fluid_layers_argmin: + +argmin +------ + +.. autofunction:: paddle.fluid.layers.argmin + :noindex: + +.. _api_fluid_layers_argmax: + +argmax +------ + +.. autofunction:: paddle.fluid.layers.argmax + :noindex: + +.. _api_fluid_layers_ones: + +ones +---- + +.. autofunction:: paddle.fluid.layers.ones + :noindex: + +.. _api_fluid_layers_zeros: + +zeros +----- + +.. autofunction:: paddle.fluid.layers.zeros + :noindex: + +.. _api_fluid_layers_reverse: + +reverse +------- + +.. autofunction:: paddle.fluid.layers.reverse :noindex: diff --git a/doc/fluid/api/metrics.rst b/doc/fluid/api/metrics.rst index ddf07775d7..0f54b2e2eb 100644 --- a/doc/fluid/api/metrics.rst +++ b/doc/fluid/api/metrics.rst @@ -1,9 +1,11 @@ .. THIS FILE IS GENERATED BY `gen_doc.{py|sh}` !DO NOT EDIT THIS FILE MANUALLY! -======= -metrics -======= +============= +fluid.metrics +============= + +.. _api_fluid_metrics_MetricBase: MetricBase ---------- @@ -12,6 +14,8 @@ MetricBase :members: :noindex: +.. _api_fluid_metrics_CompositeMetric: + CompositeMetric --------------- @@ -19,6 +23,26 @@ CompositeMetric :members: :noindex: +.. _api_fluid_metrics_Precision: + +Precision +--------- + +.. autoclass:: paddle.fluid.metrics.Precision + :members: + :noindex: + +.. _api_fluid_metrics_Recall: + +Recall +------ + +.. autoclass:: paddle.fluid.metrics.Recall + :members: + :noindex: + +.. _api_fluid_metrics_Accuracy: + Accuracy -------- @@ -26,6 +50,8 @@ Accuracy :members: :noindex: +.. _api_fluid_metrics_ChunkEvaluator: + ChunkEvaluator -------------- @@ -33,6 +59,8 @@ ChunkEvaluator :members: :noindex: +.. _api_fluid_metrics_EditDistance: + EditDistance ------------ @@ -40,6 +68,8 @@ EditDistance :members: :noindex: +.. _api_fluid_metrics_DetectionMAP: + DetectionMAP ------------ @@ -47,6 +77,8 @@ DetectionMAP :members: :noindex: +.. _api_fluid_metrics_Auc: + Auc --- diff --git a/doc/fluid/api/nets.rst b/doc/fluid/api/nets.rst index 7ae3187304..059733af18 100644 --- a/doc/fluid/api/nets.rst +++ b/doc/fluid/api/nets.rst @@ -1,9 +1,11 @@ .. THIS FILE IS GENERATED BY `gen_doc.{py|sh}` !DO NOT EDIT THIS FILE MANUALLY! -==== -nets -==== +========== +fluid.nets +========== + +.. _api_fluid_nets_simple_img_conv_pool: simple_img_conv_pool -------------------- @@ -11,18 +13,24 @@ simple_img_conv_pool .. autofunction:: paddle.fluid.nets.simple_img_conv_pool :noindex: +.. _api_fluid_nets_sequence_conv_pool: + sequence_conv_pool ------------------ .. autofunction:: paddle.fluid.nets.sequence_conv_pool :noindex: +.. _api_fluid_nets_glu: + glu --- .. autofunction:: paddle.fluid.nets.glu :noindex: +.. _api_fluid_nets_scaled_dot_product_attention: + scaled_dot_product_attention ---------------------------- diff --git a/doc/fluid/api/optimizer.rst b/doc/fluid/api/optimizer.rst index 79a0995fce..8d792120f2 100644 --- a/doc/fluid/api/optimizer.rst +++ b/doc/fluid/api/optimizer.rst @@ -1,9 +1,11 @@ .. THIS FILE IS GENERATED BY `gen_doc.{py|sh}` !DO NOT EDIT THIS FILE MANUALLY! -========= -optimizer -========= +=============== +fluid.optimizer +=============== + +.. _api_fluid_optimizer_SGD: SGD --- @@ -12,6 +14,8 @@ SGD :members: :noindex: +.. _api_fluid_optimizer_Momentum: + Momentum -------- @@ -19,6 +23,8 @@ Momentum :members: :noindex: +.. _api_fluid_optimizer_Adagrad: + Adagrad ------- @@ -26,6 +32,8 @@ Adagrad :members: :noindex: +.. _api_fluid_optimizer_Adam: + Adam ---- @@ -33,6 +41,8 @@ Adam :members: :noindex: +.. _api_fluid_optimizer_Adamax: + Adamax ------ @@ -40,6 +50,8 @@ Adamax :members: :noindex: +.. _api_fluid_optimizer_DecayedAdagrad: + DecayedAdagrad -------------- @@ -47,6 +59,17 @@ DecayedAdagrad :members: :noindex: +.. _api_fluid_optimizer_Ftrl: + +Ftrl +---- + +.. autoclass:: paddle.fluid.optimizer.Ftrl + :members: + :noindex: + +.. _api_fluid_optimizer_SGDOptimizer: + SGDOptimizer ------------ @@ -54,6 +77,8 @@ SGDOptimizer :members: :noindex: +.. _api_fluid_optimizer_MomentumOptimizer: + MomentumOptimizer ----------------- @@ -61,6 +86,8 @@ MomentumOptimizer :members: :noindex: +.. _api_fluid_optimizer_AdagradOptimizer: + AdagradOptimizer ---------------- @@ -68,6 +95,8 @@ AdagradOptimizer :members: :noindex: +.. _api_fluid_optimizer_AdamOptimizer: + AdamOptimizer ------------- @@ -75,6 +104,8 @@ AdamOptimizer :members: :noindex: +.. _api_fluid_optimizer_AdamaxOptimizer: + AdamaxOptimizer --------------- @@ -82,6 +113,8 @@ AdamaxOptimizer :members: :noindex: +.. _api_fluid_optimizer_DecayedAdagradOptimizer: + DecayedAdagradOptimizer ----------------------- @@ -89,6 +122,26 @@ DecayedAdagradOptimizer :members: :noindex: +.. _api_fluid_optimizer_RMSPropOptimizer: + +RMSPropOptimizer +---------------- + +.. autoclass:: paddle.fluid.optimizer.RMSPropOptimizer + :members: + :noindex: + +.. _api_fluid_optimizer_FtrlOptimizer: + +FtrlOptimizer +------------- + +.. autoclass:: paddle.fluid.optimizer.FtrlOptimizer + :members: + :noindex: + +.. _api_fluid_optimizer_Adadelta: + Adadelta -------- @@ -96,6 +149,8 @@ Adadelta :members: :noindex: +.. _api_fluid_optimizer_ModelAverage: + ModelAverage ------------ @@ -103,6 +158,8 @@ ModelAverage :members: :noindex: +.. _api_fluid_optimizer_Optimizer: + Optimizer --------- @@ -110,3 +167,12 @@ Optimizer :members: :noindex: +.. _api_fluid_optimizer_RMSPropOptimizer: + +RMSPropOptimizer +---------------- + +.. autoclass:: paddle.fluid.optimizer.RMSPropOptimizer + :members: + :noindex: + diff --git a/doc/fluid/api/param_attr.rst b/doc/fluid/api/param_attr.rst index 8e4ddb2b04..33035bbc7c 100644 --- a/doc/fluid/api/param_attr.rst +++ b/doc/fluid/api/param_attr.rst @@ -1,9 +1,11 @@ .. THIS FILE IS GENERATED BY `gen_doc.{py|sh}` !DO NOT EDIT THIS FILE MANUALLY! -========== -param_attr -========== +================ +fluid.param_attr +================ + +.. _api_fluid_param_attr_ParamAttr: ParamAttr --------- @@ -12,6 +14,8 @@ ParamAttr :members: :noindex: +.. _api_fluid_param_attr_WeightNormParamAttr: + WeightNormParamAttr ------------------- diff --git a/doc/fluid/api/profiler.rst b/doc/fluid/api/profiler.rst index 74d102dcb0..c750a2d588 100644 --- a/doc/fluid/api/profiler.rst +++ b/doc/fluid/api/profiler.rst @@ -1,9 +1,11 @@ .. THIS FILE IS GENERATED BY `gen_doc.{py|sh}` !DO NOT EDIT THIS FILE MANUALLY! -======== -profiler -======== +============== +fluid.profiler +============== + +.. _api_fluid_profiler_cuda_profiler: cuda_profiler ------------- @@ -11,15 +13,35 @@ cuda_profiler .. autofunction:: paddle.fluid.profiler.cuda_profiler :noindex: +.. _api_fluid_profiler_reset_profiler: + reset_profiler -------------- .. autofunction:: paddle.fluid.profiler.reset_profiler :noindex: +.. _api_fluid_profiler_profiler: + profiler -------- .. autofunction:: paddle.fluid.profiler.profiler :noindex: +.. _api_fluid_profiler_start_profiler: + +start_profiler +-------------- + +.. autofunction:: paddle.fluid.profiler.start_profiler + :noindex: + +.. _api_fluid_profiler_stop_profiler: + +stop_profiler +------------- + +.. autofunction:: paddle.fluid.profiler.stop_profiler + :noindex: + diff --git a/doc/fluid/api/recordio_writer.rst b/doc/fluid/api/recordio_writer.rst new file mode 100644 index 0000000000..f0c12fd115 --- /dev/null +++ b/doc/fluid/api/recordio_writer.rst @@ -0,0 +1,23 @@ +.. THIS FILE IS GENERATED BY `gen_doc.{py|sh}` + !DO NOT EDIT THIS FILE MANUALLY! + +===================== +fluid.recordio_writer +===================== + +.. _api_fluid_recordio_writer_convert_reader_to_recordio_file: + +convert_reader_to_recordio_file +------------------------------- + +.. autofunction:: paddle.fluid.recordio_writer.convert_reader_to_recordio_file + :noindex: + +.. _api_fluid_recordio_writer_convert_reader_to_recordio_files: + +convert_reader_to_recordio_files +-------------------------------- + +.. autofunction:: paddle.fluid.recordio_writer.convert_reader_to_recordio_files + :noindex: + diff --git a/doc/fluid/api/regularizer.rst b/doc/fluid/api/regularizer.rst index 756bc53baa..987eaea903 100644 --- a/doc/fluid/api/regularizer.rst +++ b/doc/fluid/api/regularizer.rst @@ -1,9 +1,11 @@ .. THIS FILE IS GENERATED BY `gen_doc.{py|sh}` !DO NOT EDIT THIS FILE MANUALLY! -=========== -regularizer -=========== +================= +fluid.regularizer +================= + +.. _api_fluid_regularizer_append_regularization_ops: append_regularization_ops ------------------------- @@ -11,12 +13,7 @@ append_regularization_ops .. autofunction:: paddle.fluid.regularizer.append_regularization_ops :noindex: -WeightDecayRegularizer ----------------------- - -.. autoclass:: paddle.fluid.regularizer.WeightDecayRegularizer - :members: - :noindex: +.. _api_fluid_regularizer_L1Decay: L1Decay ------- @@ -25,6 +22,8 @@ L1Decay :members: :noindex: +.. _api_fluid_regularizer_L2Decay: + L2Decay ------- @@ -32,6 +31,8 @@ L2Decay :members: :noindex: +.. _api_fluid_regularizer_L1DecayRegularizer: + L1DecayRegularizer ------------------ @@ -39,6 +40,8 @@ L1DecayRegularizer :members: :noindex: +.. _api_fluid_regularizer_L2DecayRegularizer: + L2DecayRegularizer ------------------ diff --git a/doc/fluid/api/transpiler.rst b/doc/fluid/api/transpiler.rst new file mode 100644 index 0000000000..d2ac04f144 --- /dev/null +++ b/doc/fluid/api/transpiler.rst @@ -0,0 +1,59 @@ +.. THIS FILE IS GENERATED BY `gen_doc.{py|sh}` + !DO NOT EDIT THIS FILE MANUALLY! + +================ +fluid.transpiler +================ + +.. _api_fluid_transpiler_DistributeTranspiler: + +DistributeTranspiler +-------------------- + +.. autoclass:: paddle.fluid.transpiler.DistributeTranspiler + :members: + :noindex: + +.. _api_fluid_transpiler_InferenceTranspiler: + +InferenceTranspiler +------------------- + +.. autoclass:: paddle.fluid.transpiler.InferenceTranspiler + :members: + :noindex: + +.. _api_fluid_transpiler_memory_optimize: + +memory_optimize +--------------- + +.. autofunction:: paddle.fluid.transpiler.memory_optimize + :noindex: + +.. _api_fluid_transpiler_release_memory: + +release_memory +-------------- + +.. autofunction:: paddle.fluid.transpiler.release_memory + :noindex: + +.. _api_fluid_transpiler_HashName: + +HashName +-------- + +.. autoclass:: paddle.fluid.transpiler.HashName + :members: + :noindex: + +.. _api_fluid_transpiler_RoundRobin: + +RoundRobin +---------- + +.. autoclass:: paddle.fluid.transpiler.RoundRobin + :members: + :noindex: + diff --git a/doc/fluid/design/concepts/lod_tensor.md b/doc/fluid/design/concepts/lod_tensor.md index d606d7a790..748488f6d5 100644 --- a/doc/fluid/design/concepts/lod_tensor.md +++ b/doc/fluid/design/concepts/lod_tensor.md @@ -173,6 +173,7 @@ are transformed into offsets of elements/words as follows: ## Slicing of LoD Tensors + When we use the above 2-level LoD Tensor as the input to a nested-RNN, we need to retrieve certain sequences. Here we define the sequence identified by branch as the **-slice**. For example, the <2>-slice of above example is @@ -189,3 +190,22 @@ and the <2,0>-slice of above slice is 10 12 || ``` + +## Length Representation vs Offset Representation + +The offset representation is an implementation-oriented decision and it makes understanding the idea behind LoDTensor difficult. +Hence, we encapsulate this implementation detail in C++ and expose the original length representation in our Python API. +Specifically, we call this length representation `recursive_sequence_lengths` and users can use the following code to set or get the `recursive_sequence_lengths` of a LoDTensor in Python: +```Python +# length representation of lod called recursive_sequence_lengths +recursive_seq_lens = [[3, 1, 2], [2, 2, 1, 3, 1, 2]] +# Create a LoDTensor that has the above recursive_sequence_lengths info. +# This recursive_sequence_lengths will be converted to an offset representation of LoD in the C++ implementation under the hood. +tensor = fluid.LoDTensor(lod) + +# Set/Change the recursive_sequence_lengths info of LoDTensor +tensor.set_recursive_sequence_lengths([[3, 1, 2]]) +# Get the recursive_sequence_lengths info of a LoDTensor (the offset-based LoD representation stored in C++ will be converted +# back to length-based recursive_sequence_lengths), new_recursive_seq_lens = [[3, 1, 2]] +new_recursive_seq_lens = tensor.recursive_sequence_lengths() +``` diff --git a/doc/fluid/design/concepts/python_data_feeding.md b/doc/fluid/design/concepts/python_data_feeding.md new file mode 100644 index 0000000000..dffee8e02b --- /dev/null +++ b/doc/fluid/design/concepts/python_data_feeding.md @@ -0,0 +1,130 @@ +# Python Data Feeding + +In the former implementation of Paddle Fluid, there are two ways to feed data: + +- Use `reader_op` in backend C++ side. This method only supports data feeding from recordio files and random data generators, but supports many kinds of `decorated_readers`. For examples, `double_buffer_reader` uses two threads to achieve better performance: one for time-consuming I/O operations, and the other for `Executor::Run()`. See [C++ Data Feeding](https://github.com/PaddlePaddle/Paddle/blob/develop/doc/fluid/design/concepts/cpp_data_feeding.md) for details. + +- Feed data directly using `DataFeeder.feed()` in Python codes. It is more flexible than the first way. Many kinds of preprocessing steps can be performed before feeding using Python or any other languages, instead of adding many uncommon `operators` in C++ side. But this method is less efficient: the program cannot read the next mini-batch data before `Executor::Run()` ends. Moreover, `decorated_readers` such as `double_buffer_reader` cannot be used for better performance. + +In this document, we design a Python Data Feeding process combining the efficiency of the first way and the flexibility of the second way. A data queue `LoDTensorBlockingQueue` is designed to be shared by the Python and C++ side, while `LoDTensorArray` is pushed into the queue in Python side and `reader_op` in C++ side reads out the data from the queue. + + +## Design of LoDTensorBlockingQueue +`LoDTensorBlockingQueue` is a blocking queue with a fixed `capacity` and accepts `std::vector` with shapes indicated by `dims`. Since `LoDTensorBlockingQueue` must be constructed using `capacity` and `dims`, it cannot be a `Variable` type. Therefore, a `LoDTensorBlockingQueueHolder` is designed to defer construction of `LoDTensorBlockingQueue`. + +```C++ +class LoDTensorBlockingQueueHolder; + +class LoDTensorBlockingQueue { + friend class LoDTensorBlockingQueueHolder; + private: + // `LoDTensorBlockingQueue` can only be constructed by + // `LoDTensorBlockingQueueHolder::InitOnce()` + LoDTensorBlockingQueue(size_t capacity, const std::vector& dims); + + public: + size_t Size() const { return queue_.Size(); } // Get the current size of the queue + + size_t Cap() const { return queue_.Cap(); }// Get the capacity of the queue + + void Close() { return queue_.Close(); } + + bool IsClosed() const { return queue_.IsClosed(); } + + // Block if Size() == Cap() + // Return false only when queue_.IsClosed() == true + bool Push(const std::vector &lod_tensor_vec); + + // Block if Size() == 0. + // *Success == false when queue_.IsClosed() == true + std::vector Pop(bool *success = nullptr); + + private: + // Use reader::BlockingQueue as the inner data structure + BlockingQueue> queue_; + std::vector dims_; +}; + +class LoDTensorBlockingQueueHolder { + public: + // Call the constructor of `LoDTensorBlockingQueue` to create queue_ + // `InitOnce` can only called once, otherwise an exception would raise + void InitOnce(size_t capacity, const std::vector& dims) { + PADDLE_ENFORCE(queue_ == nullptr); + queue_.reset(new LoDTensorBlockingQueue(capacity, dims)); + } + + const std::shared_ptr& GetQueue() const { return queue_; } + + private: + std::shared_ptr queue_; +}; +``` + +There are some major things that must be concerned: +- `LoDTensorBlockingQueueHolder` should be a `Variable` in global scope, so that `reader_op` can find it when reading data. +- A `Variable` of `LoDTensorBlockingQueueHolder` but not `VarDesc` must be created in Python code before `Executor::Run()` so that `Executor::Run()` can get the feeding data when it is called. +- `Create_reader_op` should accept the name of the `LoDTensorBlockingQueueHolder` variable as an input. + + +## Release of the GIL in pybind +`Pybind11::gil_scoped_release` is used to release GIL (Global Interpreter Lock) when `LoDTensorBlockingQueue::Push()` or `Executor::Run()` method are invoked in Python side, making `LoDTensorBlockingQueue::Push()` and `Executor::Run()` run in parallel. + + +## Design of PyReader +`PyReader` is a reader which holds a `LoDTensorBlockingQueue` object. +```C++ +class PyReader : public ReaderBase { + public: + explicit PyReader(const std::shared_ptr& queue); + + void ReadNext(std::vector* out) override { + bool success; + *out = queue_->Pop(&success); + if (!success) out->clear(); + } + + void ReInit() override { return; } + + private: + std::shared_ptr queue_; +}; +``` + + +## Design of CreatePyReaderOp +`CreatePyReaderOp` is used to create the `PyReader` object. It requires an input `blocking_queue` which indicates the name of the `LoDTensorBlockingQueueHolder` variable. +```C++ +class CreatePyReaderOp : public framework::OperatorBase { + public: + using framework::OperatorBase::OperatorBase; + private: + void RunImpl(const framework::Scope& scope, + const platform::Place& dev_place) const override { + auto* out = scope.FindVar(Output("Out")) + ->template GetMutable(); + if (out->Get() != nullptr) return; + + const std::string& queue_name = Input("blocking_queue"); + auto* queue_holder_var = scope.FindVar(queue_name); + PADDLE_ENFORCE(queue_holder_var != nullptr); + auto* queue_holder = queue_holder_var + ->template GetMutable(); + out->Reset(new PyReader(queue_holder->GetQueue())); + } +}; +``` + +## Design of Python codes +The design of Python codes are as follows. First, we construct a variable of `LoDTensorBlockingQueueHolder` and init it with given parameters, returning the `LoDTensorBlockingQueue` object after initialization. After that, a layer of `CreatePyReaderOp` is constructed and accepts the name of the `LoDTensorBlockingQueueHolder` variable. The `LoDTensorBlockingQueue` object and result of the layer are both returned. +```Python +def py_reader(capacity, shapes): + queue_name = unique_name.generate("lod_tensor_blocking_queue") + var = global_scope().var(feeder_name) # create LoDTensorBlockingQueueHolder Variable + feed_queue = core.init_lod_tensor_blocking_queue(var, capacity, shapes) # init the queue + out = create_var() + create_py_reader_op_with_queue_name( + inputs={'blocking_queue': queue_name}, + outputs={'Out':[out]}) + return out, feed_queue +``` diff --git a/doc/fluid/design/dist_train/dist_train_nccl2.md b/doc/fluid/design/dist_train/dist_train_nccl2.md new file mode 100644 index 0000000000..aa7455ec5d --- /dev/null +++ b/doc/fluid/design/dist_train/dist_train_nccl2.md @@ -0,0 +1,35 @@ +# Distributed Training with NCCL2 + +We design a pattern that can enable training with `ParallelExecutor` and +using [NCCL2](https://developer.nvidia.com/nccl) as it's collective +communication library. + +In `ParallelExecutor` we can use `AllReduce` or `Reduce` and `Broadcast` +to do multi GPU training. And if we initialize NCCL2 communicators as +ranks in a distributed environment, we can simply run the `ParallelExecutor` +as a distributed program! The only thing that may be different than in +the single node version is that we need to broadcast the NCCL unique ID +to all the nodes, and initialize communicators using that ID, so NCCL2 +will know each other as ranks. + +To achieve this feature, we introduce a new operator: `gen_nccl_id` op, +so we are ***not*** "bind to" running NCCL2 with MPI, we can run it in +what ever platform you like. + +It have two running modes: + +1. Generate and broadcast mode, which should be used on trainer 0; +1. Listen and fetch mode, which should be used on trainers other than 0. + +In both two modes, this op can save the NCCL ID into current scope as a +persistable variable, Then we can insert this op at the end of +"startup program" of fluid, so that all workers can get the same ID to +initialize NCCL communicator objects. + + + +The above figure indicates the general process when training with NCCL2 +distributed. Each trainer have the number of communicators equal to the +number of GPUs, but the ranks should match the global ranks number: here +we have total 8 GPUs, so `nranks==8`, for each trainer, the ranks should +be from 0 ~ 3 on trainer 0 and 4 ~ 7 on trainer 1. diff --git a/doc/fluid/design/dist_train/distributed_lookup_table_design.md b/doc/fluid/design/dist_train/distributed_lookup_table_design.md index 9887291389..e284e1ec5c 100644 --- a/doc/fluid/design/dist_train/distributed_lookup_table_design.md +++ b/doc/fluid/design/dist_train/distributed_lookup_table_design.md @@ -1,6 +1,6 @@ # Design Doc: Distributed Lookup Table Operator -A lookup table operator in PaddlePaddle where the table could be out +A distribute lookup table operator in PaddlePaddle where the table could be out of the memory of a computer. ## Background @@ -24,14 +24,14 @@ memory, so we'd need a distributed storage service, which supports the lookup of rows. The following figure illustrates the multiplication of x with two -non-zero elements, or say, two symbols, and a lookup table W: +non-zero elements, or say two symbols, and a lookup table W: ![lookup table](./src/lookup_table.png) ### The Backward Algorithm The backward algorithm computes W'(x) using W(x). W'(x) has the same -scale of size as W(x) and is much smaller than W. +the scale of size as W(x) and is much smaller than W. To optimize W given W', we can do simple SGD update: @@ -44,85 +44,46 @@ $$W = f(W, W')$$ The following figure illustrates the backward pass of the lookup operator: ![lookup table training](./src/lookup_table_training.png) -## Distributed Storage Service - -The forward algorithm requires a distributed storage service for W. -The backward algorithm prefers that the storage system can apply the -optimization algorithm on W. The following two sections describe two -solutions -- the former doesn't require that the storage service can -do optimization, the latter does. - -### Storage Service Doesn't Optimize - -In this design, we use highly-optimized distributed storage, e.g., -memcached, as the storage service, and we run the optimization -algorithm on parameter servers of PaddlePaddle. The following figure -illustrates the training process. - - - - - -Each trainer runs the forward and backward passes using their local -data: - -1. In the forward pass, when a trainer runs the forward algorithm of a - lookup operator, it retrieves W(x) from the storage service. -1. The trainer computes W'(x) in the backward pass using W(x). - -During the global update process: - -1. Each trainer uploads its W'(x) to parameter servers. -1. The parameter server runs the optimization algorithm, e.g., the - Adam optimization algorithm, which requires that - 1. The parameter server retrieves W(x) from memcached, and - 1. The parameter server pushes $\Delta W(x)=f(W(x), lambda \sum_j - W'(x))$ to memcached, where $f$ denotes the optimization - algorithm. - -### Storage Service Does Optimize - -This design is very similar to the above one, except that the -optimization algorithm $f$ runs on the storage service. - -- Pro: parameter servers do not retrieve W(x) from the storage - service, thus saves half network communication. -- Con: the storage service needs to be able to run the optimization - algorithm. - -## Conclusion - -Let us do the "storage service does not optimize" solution first, as a -baseline at least, because it is easier to use a well-optimized -distributed storage service like memcached. We can do the "storage -service does optimize" solution later or at the same time, which, if -implemented carefully, should have better performance than the former. +## Distributed Lookup Table +### Problem 1: The lookup table may be very large. + + In the condition like the search engine and recommendation system, the number of feature Id may be very large, say 100,000,000,000, then for a float value lookup table of size 8, the total size of the table is: + + ``` + 100,000,000,000 * 8 * 4(Bytes) = 2980.23 GB + ``` + +### Solution: Distributed storage + +1. Paddle use [SelectedRows](https://github.com/PaddlePaddle/Paddle/blob/develop/doc/fluid/design/modules/selected_rows.md) as the storage format for the lookup table, the lookup table parameter will be split to multi-machine according to the hash of the feature ID, and data will also be split and send to the same machine to prefetch the parameter. + +1. For common parameters, the trainer will get the whole parameter for training, but for the big lookup table, the trainer can not store the whole parameter. Because the input data feature is very sparse, every time we only need a few parameters for training, so we use `prefetch_op` to only prefetch the parameter needed to trainer. + +### Problem 2. The Id in the lookup table is not sure before training. + + The feature Id is calculated by the hash function because the feature data source is so large, we can not get all the Id before training. So we can not initialize the table before training. + +### Solution: Id auto growth + +At the beginning of training, paddle only malloc the memory for the lookup table at parameter server side, the Id and it's value will not be initialized. During training, when a parameter server received an Id, if it is already in the lookup table, it will return the existing parameter, if the Id does not exist, paddle will add it into the lookup table and initialize the value for it. + +### Problem 3: parameter load and save + +For common parameters, paddle use trainer to save and load them. But for distributed lookup table, trainer cannot do this because it's large size. + +### Solution: Parameter server side save and load + +Paddle support parameter server side save and load for distribute lookup table. Each machine of parameter servers will only save and load part of the whole table. + +## Architecture +The whole architecture of the distribute lookup table is as below: + +### Training steps: +1. Read a batch of data, the data is feature ids. +1. The input ids will be split by `split_ids_op` with the same hash function of the lookup table. +1. The `prefetch_op` use the split result to prefetch parameters back from the lookup table. +1. Run forward-backward to get the gradient of the lookup table. +1. `split_ids_op` split the gradient and then use `send_op` to the parameter server. +1. parameter server update the table with the received gradient. + +![distribute lookup table](./src/distributed_lookup_table.jpeg) diff --git a/doc/fluid/design/dist_train/src/distributed_lookup_table.graffle b/doc/fluid/design/dist_train/src/distributed_lookup_table.graffle new file mode 100644 index 0000000000..65dfdbbacd Binary files /dev/null and b/doc/fluid/design/dist_train/src/distributed_lookup_table.graffle differ diff --git a/doc/fluid/design/dist_train/src/distributed_lookup_table.jpeg b/doc/fluid/design/dist_train/src/distributed_lookup_table.jpeg new file mode 100644 index 0000000000..5353a16fd3 Binary files /dev/null and b/doc/fluid/design/dist_train/src/distributed_lookup_table.jpeg differ diff --git a/doc/fluid/design/dist_train/src/fluid_lookup_remote_table.graffle b/doc/fluid/design/dist_train/src/fluid_lookup_remote_table.graffle new file mode 100644 index 0000000000..96ca6d48f4 Binary files /dev/null and b/doc/fluid/design/dist_train/src/fluid_lookup_remote_table.graffle differ diff --git a/doc/fluid/design/dist_train/src/fluid_lookup_remote_table.png b/doc/fluid/design/dist_train/src/fluid_lookup_remote_table.png new file mode 100644 index 0000000000..afa25ab3b4 Binary files /dev/null and b/doc/fluid/design/dist_train/src/fluid_lookup_remote_table.png differ diff --git a/doc/fluid/design/dist_train/src/ncc2_design.graffle b/doc/fluid/design/dist_train/src/ncc2_design.graffle new file mode 100644 index 0000000000..7d2753bbb0 Binary files /dev/null and b/doc/fluid/design/dist_train/src/ncc2_design.graffle differ diff --git a/doc/fluid/design/dist_train/src/ncc2_design.png b/doc/fluid/design/dist_train/src/ncc2_design.png new file mode 100644 index 0000000000..da0d5ee81f Binary files /dev/null and b/doc/fluid/design/dist_train/src/ncc2_design.png differ diff --git a/doc/fluid/design/multi_devices/kernel_selection.md b/doc/fluid/design/multi_devices/kernel_selection.md index 967317d5d2..4d2aab87b8 100644 --- a/doc/fluid/design/multi_devices/kernel_selection.md +++ b/doc/fluid/design/multi_devices/kernel_selection.md @@ -74,10 +74,10 @@ void OperatorWithKernel::Run( auto kernel_type_for_var = this->GetKernelTypeForVar(...); if (kernel_type_for_var.place_ != expected_kernel_key.place_) { auto* trans_var = new_scope.Var(var_name); - auto* out = DataTransform(expected_kernel_key, + auto* out = TransformData(expected_kernel_key, kernel_type_for_var, *tensor_in); - CopyVariableWithTensor(...); + SetTensorToVariable(...); } } diff --git a/doc/fluid/howto/cluster/fluid_cluster_train_cn.md b/doc/fluid/howto/cluster/fluid_cluster_train_cn.md index b99b90056b..55326940ce 100644 --- a/doc/fluid/howto/cluster/fluid_cluster_train_cn.md +++ b/doc/fluid/howto/cluster/fluid_cluster_train_cn.md @@ -168,13 +168,13 @@ cd /paddle/python/paddle/fluid/tests/book 第二步,启动Parameter Server: ```bash -PADDLE_INIT_PORT=6174 PADDLE_INIT_PSERVERS=192.168.1.2 TRAINERS=2 POD_IP=192.168.1.2 PADDLE_INIT_TRAINER_ID=1 TRAINING_ROLE=PSERVER python test_fit_a_line.py +PADDLE_PSERVER_PORT=6174 PADDLE_PSERVER_IPS=192.168.1.2 PADDLE_TRAINERS=2 PADDLE_CURRENT_IP=192.168.1.2 PADDLE_TRAINER_ID=1 PADDLE_TRAINING_ROLE=PSERVER python test_fit_a_line.py ``` 执行命令后请等待出现提示: ```Server listening on 192.168.1.2:6174 ```, 表示Paramter Server已经正常启动。 第三步,启动Trainer: ```bash -PADDLE_INIT_PORT=6174 PADDLE_INIT_PSERVERS=192.168.1.3 TRAINERS=2 POD_IP=192.168.1.3 PADDLE_INIT_TRAINER_ID=1 TRAINING_ROLE=TRAINER python test_fit_a_line.py +PADDLE_PSERVER_PORT=6174 PADDLE_PSERVER_IPS=192.168.1.3 PADDLE_TRAINERS=2 PADDLE_CURRENT_IPP=192.168.1.3 PADDLE_TRAINER_ID=1 PADDLE_TRAINING_ROLE=TRAINER python test_fit_a_line.py ``` 由于我们定义的Trainer的数量是2个,因此需要在另外一个计算节点上再启动一个Trainer。 diff --git a/doc/fluid/howto/cluster/fluid_recordio.md b/doc/fluid/howto/cluster/fluid_recordio.md index 55ce63ec19..92859e8f62 100644 --- a/doc/fluid/howto/cluster/fluid_recordio.md +++ b/doc/fluid/howto/cluster/fluid_recordio.md @@ -114,8 +114,8 @@ def gen_train_list(file_pattern, trainers, trainer_id): ret_list.append(f) return ret_list -trainers = int(os.getenv("TRAINERS")) -trainer_id = int(os.getenv("PADDLE_INIT_TRAINER_ID")) +trainers = int(os.getenv("PADDLE_TRAINERS")) +trainer_id = int(os.getenv("PADDLE_TRAINER_ID")) data_file = fluid.layers.io.open_files( filenames=gen_train_list("./mnist-[0-9]*.recordio", 2, 0), thread_num=1, diff --git a/doc/fluid/howto/inference/build_and_install_lib_cn.rst b/doc/fluid/howto/inference/build_and_install_lib_cn.rst index c8d9992fcc..84005b54e0 100644 --- a/doc/fluid/howto/inference/build_and_install_lib_cn.rst +++ b/doc/fluid/howto/inference/build_and_install_lib_cn.rst @@ -13,6 +13,7 @@ cpu_noavx_openblas `fluid.tgz `_ cuda8.0_cudnn5_avx_mkl `fluid.tgz `_ cuda8.0_cudnn7_avx_mkl `fluid.tgz `_ +cuda9.0_cudnn7_avx_mkl `fluid.tgz `_ ====================== ======================================== 从源码编译 diff --git a/doc/fluid/howto/optimization/host_memory_profiling_cn.md b/doc/fluid/howto/optimization/host_memory_profiling_cn.md index 9b55a66ded..7fb0883dd9 100644 --- a/doc/fluid/howto/optimization/host_memory_profiling_cn.md +++ b/doc/fluid/howto/optimization/host_memory_profiling_cn.md @@ -1,4 +1,4 @@ -## 堆内存分析和优化 +# 堆内存分析和优化 计算机程序都可能有内存泄漏的风险。**内存泄漏**一般是由于程序在堆(heap)上分配了内存而没有释放,随着程序的运行占用的内存越来越大,一方面会影响程序的稳定性,可能让运行速度越来越慢,或者造成oom,甚至会影响运行程序的机器的稳定性,造成宕机。 @@ -20,11 +20,11 @@ Paddle也提供了基于gperftool的[CPU性能分析教程](https://github.com/P 对于堆内存的分析,主要用到thread-caching malloc和heap-profiling using tcmalloc。 -## 使用流程 -#### 环境 +## 环境 + 本教程基于paddle提供的Docker开发环境paddlepaddle/paddle:latest-dev,基于Ubuntu 16.04.4 LTS环境。 -#### 使用流程 +## 使用流程 - 安装google-perftools diff --git a/doc/fluid/howto/optimization/timeline_cn.md b/doc/fluid/howto/optimization/timeline_cn.md new file mode 100644 index 0000000000..5d061e1c00 --- /dev/null +++ b/doc/fluid/howto/optimization/timeline_cn.md @@ -0,0 +1,26 @@ +# 如何使用timeline工具做性能分析 + +1. 在训练的主循环外加上`with profiler.profiler(...)`。运行之后,代码会在`/tmp/profile`目录下生成一个profile的记录文件。 + + **提示:** + 请不要在timeline记录信息时运行太多次迭代,因为timeline中的记录数量和迭代次数是成正比的。 + + ```python + with profiler.profiler('All', 'total', '/tmp/profile') as prof: + for pass_id in range(pass_num): + for batch_id, data in enumerate(train_reader()): + exe.run(fluid.default_main_program(), + feed=feeder.feed(data), + fetch_list=[]) + ... + ``` + +1. 运行`python paddle/tools/timeline.py`来处理`/tmp/profile`,这个程序默认会生成一个`/tmp/timeline`文件,你也可以用命令行参数来修改这个路径,请参考[timeline.py](https://github.com/PaddlePaddle/Paddle/blob/develop/tools/timeline.py)。 + +1. 打开chrome浏览器,访问,用`load`按钮来加载生成的`timeline`文件。 + + ![chrome tracing](./tracing.jpeg) + +1. 结果如下图所示,可以放到来查看timetime的细节信息。 + + ![chrome timeline](./timeline.jpeg) diff --git a/doc/fluid/howto/optimization/timeline.md b/doc/fluid/howto/optimization/timeline_en.md similarity index 100% rename from doc/fluid/howto/optimization/timeline.md rename to doc/fluid/howto/optimization/timeline_en.md diff --git a/doc/v2/design/cluster_train/large_model_dist_train.md b/doc/v2/design/cluster_train/large_model_dist_train.md index 0c4b5bc24c..edb0245ea0 100644 --- a/doc/v2/design/cluster_train/large_model_dist_train.md +++ b/doc/v2/design/cluster_train/large_model_dist_train.md @@ -52,7 +52,7 @@ In `trainer_internal.cpp:L93 trainOneBatch`: When doing actual network forward and backward, at the beginning of each batch, the trainer will try to download one row of data from pserver. -In `trainer/RemoteParameterUpdater.cpp`: `parameterUpdater_->getParametersRemote();`: +In `legacy/trainer/RemoteParameterUpdater.cpp`: `parameterUpdater_->getParametersRemote();`: ```c++ if (fullSize) { diff --git a/doc/v2/design/interface/00.why_plain_c.md b/doc/v2/design/interface/00.why_plain_c.md index a144309334..826ff3141b 100644 --- a/doc/v2/design/interface/00.why_plain_c.md +++ b/doc/v2/design/interface/00.why_plain_c.md @@ -65,7 +65,7 @@ paddle_error paddle_matrix_get_shape(paddle_matrix matrix, 而在CPP里面实现这个C的接口,文件 `paddle_matrix.cpp` ```cpp -#include "paddle/math/matrix.h" +#include "paddle/legacy/math/matrix.h" extern "C" paddle_error paddle_matrix_shape(paddle_matrix matrix, uint64_t *width, diff --git a/doc/v2/design/mkl/mkldnn.md b/doc/v2/design/mkl/mkldnn.md index bd5bcf6f67..4876de0045 100644 --- a/doc/v2/design/mkl/mkldnn.md +++ b/doc/v2/design/mkl/mkldnn.md @@ -18,20 +18,20 @@ Figure 1. PaddlePaddle on IA 具体的完成状态可以参见[这里](https://github.com/PaddlePaddle/Paddle/projects/21)。 ## Contents - -- [Overview](#overview) -- [Actions](#actions) - - [CMake](#cmake) - - [Matrix](#matrix) - - [Layers](#layers) - - [Activations](#activations) - - [Parameters](#parameters) - - [Gradients](#gradients) - - [Unit Tests](#unit-tests) - - [Python API](#python-api) - - [Benchmarking](#benchmarking) - - [Others](#others) -- [Design Concerns](#design-concerns) + +- [Overview](#overview) +- [Actions](#actions) + - [CMake](#cmake) + - [Matrix](#matrix) + - [Layers](#layers) + - [Activations](#activations) + - [Parameters](#parameters) + - [Gradients](#gradients) + - [Unit Tests](#unit-tests) + - [Python API](#python-api) + - [Benchmarking](#benchmarking) + - [Others](#others) +- [Design Concerns](#design-concerns) ## Overview @@ -218,20 +218,20 @@ if use_mkldnn 我们总结出一些特别需要注意的点: 1. 使用**deviceId_**。为了尽可能少的在父类Layer中添加变量或者函数, -我们决定使用已有的`deviceId_`变量来区分layer的属性,定义`-2`为`MKLDNNLayer`特有的设备ID。 -2. 重写父类Layer的**init**函数,修改`deviceId_`为`-2`,代表这个layer是用于跑在MKL-DNN的环境下。 +我们决定使用已有的`deviceId_`变量来区分layer的属性,定义`-2`为`MKLDNNLayer`特有的设备ID。 +2. 重写父类Layer的**init**函数,修改`deviceId_`为`-2`,代表这个layer是用于跑在MKL-DNN的环境下。 3. 创建`MKLDNNBase`,定义一些除了layer和memory相关的类和函数。 -包括MKL-DNN会用到`MKLDNNStream`和`CPUEngine`,和未来可能还会用到`FPGAEngine`等。 +包括MKL-DNN会用到`MKLDNNStream`和`CPUEngine`,和未来可能还会用到`FPGAEngine`等。 4. 如果MKL-DNN layer的后面接有cpu device,那么就会使`output_.value`与`extOutVal_`共享内存, 同时数据格式就是`NCHW`,这样下一个cpu device就能拿到正确的数据。 在有普通的CPU layer时, `extOutVal_`和`extOutGrad_`的格式始终是`NCHW`或者`NC`。 ## References 1. [MKL small library](https://github.com/01org/mkl-dnn#linking-your-application)是[Intel MKL](https://software.intel.com/en-us/mkl)的一个子集。 -主要包括了深度学习相关的数学原语与操作,一般由MKL-DNN在发布[新版本](https://github.com/01org/mkl-dnn/releases)时一起更新。 +主要包括了深度学习相关的数学原语与操作,一般由MKL-DNN在发布[新版本](https://github.com/01org/mkl-dnn/releases)时一起更新。 2. [MKL-DNN System Requirements](https://github.com/01org/mkl-dnn#system-requirements)。 目前在PaddlePaddle中,仅会在支持AVX2指令集及以上的机器才使用MKL-DNN。 3. [原来的方案](https://github.com/PaddlePaddle/Paddle/pull/3096)会引入**nextLayer**的信息。 -但是在PaddlePaddle中,无论是重构前的layer还是重构后的op,都不会想要知道next layer/op的信息。 +但是在PaddlePaddle中,无论是重构前的layer还是重构后的op,都不会想要知道next layer/op的信息。 4. MKL-DNN的高性能格式与PaddlePaddle原有的`NCHW`不同(PaddlePaddle中的cuDNN部分使用的也是`NCHW`,所以不存在这个问题)。 -所以需要引入一个转换方法,并且只需要在必要的时候转换这种格式,才能更好的发挥MKL-DNN的性能。 +所以需要引入一个转换方法,并且只需要在必要的时候转换这种格式,才能更好的发挥MKL-DNN的性能。 diff --git a/doc/v2/dev/new_layer_cn.rst b/doc/v2/dev/new_layer_cn.rst index 3115654b2b..e5a1434612 100644 --- a/doc/v2/dev/new_layer_cn.rst +++ b/doc/v2/dev/new_layer_cn.rst @@ -58,7 +58,7 @@ PaddlePaddle的base layer类可以自动计算上面的导数。 实现C++类 =================== -一个网络层的C++类需要实现初始化,前向和后向。全连接层的实现位于:code:`paddle/gserver/layers/FullyConnectedLayer.h`及:code:`paddle/gserver/layers/FullyConnectedLayer.cpp`。这里我们展示一份简化过的代码。 +一个网络层的C++类需要实现初始化,前向和后向。全连接层的实现位于:code:`paddle/legacy/gserver/layers/FullyConnectedLayer.h`及:code:`paddle/legacy/gserver/layers/FullyConnectedLayer.cpp`。这里我们展示一份简化过的代码。 这个类需要继承 :code:`paddle::Layer` 这个基类,并且需要重写基类中的以下几个虚函数: @@ -153,7 +153,7 @@ PaddlePaddle的base layer类可以自动计算上面的导数。 - 每个层在其 :code:`forward` 函数的开头必须调用 :code:`Layer::forward(passType);` 。 - 之后使用 :code:`reserveOutput(batchSize, size);` 为输出分配内存。由于我们支持训练数据有不同的批次大小,所以这一步是必要的。 :code:`reserveOutput` 会相应地改变输出的尺寸。为了保证效率,如果需要扩大矩阵,我们会重新分配内存;如果需要缩减矩阵,我们会继续使用现有的内存块。 -- 之后使用矩阵运算函数来计算 :math:`\sum_i W_i x + b`。:code:`getInput(i).value` 返回第i个输入矩阵。每个输入都是一个 :math:`batchSize \times dim` 的矩阵,每行表示一个批次中的单个输入。对于我们支持的全部矩阵操作,请参考 :code:`paddle/math/Matrix.h`和:code:`paddle/math/BaseMatrix.h` 。 +- 之后使用矩阵运算函数来计算 :math:`\sum_i W_i x + b`。:code:`getInput(i).value` 返回第i个输入矩阵。每个输入都是一个 :math:`batchSize \times dim` 的矩阵,每行表示一个批次中的单个输入。对于我们支持的全部矩阵操作,请参考 :code:`paddle/legacy/math/Matrix.h`和:code:`paddle/legacy/math/BaseMatrix.h` 。 - 最终,使用 :code:`forwardActivation();` 进行激活操作。这会自动进行网络配置中声明的激活操作。 @@ -262,7 +262,7 @@ PaddlePaddle的base layer类可以自动计算上面的导数。 REGISTER_LAYER(fc, FullyConnectedLayer); } -若 :code:`cpp` 被放在 :code:`paddle/gserver/layers` 目录下,其会自动被加入编译列表。 +若 :code:`cpp` 被放在 :code:`paddle/legacy/gserver/layers` 目录下,其会自动被加入编译列表。 写梯度检查单元测试 @@ -270,7 +270,7 @@ PaddlePaddle的base layer类可以自动计算上面的导数。 写梯度检查单元测试是一个验证新实现的层是否正确的相对简单的办法。梯度检查单元测试通过有限差分法来验证一个层的梯度。首先对输入做一个小的扰动 :math:`\Delta x` ,然后观察到输出的变化为 :math:`\Delta y` ,那么,梯度就可以通过这个方程计算得到 :math:`\frac{\Delta y}{\Delta x }` 。之后,再用这个梯度去和 :code:`backward` 函数得到的梯度去对比,以保证梯度计算的正确性。需要注意的是梯度检查仅仅验证了梯度的计算,并不保证 :code:`forward` 和 :code:`backward` 函数的实现是正确的。你需要一些更复杂的单元测试来保证你实现的网络层是正确的。 -所有网络层的梯度检查单测都位于 :code:`paddle/gserver/tests/test_LayerGrad.cpp` 。我们建议你在写新网络层时把测试代码放入新的文件中。下面列出了全连接层的梯度检查单元测试。它包含以下几步: +所有网络层的梯度检查单测都位于 :code:`paddle/legacy/gserver/tests/test_LayerGrad.cpp` 。我们建议你在写新网络层时把测试代码放入新的文件中。下面列出了全连接层的梯度检查单元测试。它包含以下几步: + 生成网络层配置。网络层配置包含以下几项: - 偏置参数的大小。(例子中是4096) @@ -322,7 +322,7 @@ PaddlePaddle的base layer类可以自动计算上面的导数。 } } -如果你要为了测试而增加新的文件,例如 :code:`paddle/gserver/tests/testFCGrad.cpp` ,你需要把该文件加入 :code:`paddle/gserver/tests/CMakeLists.txt` 中。下面给出了一个例子。当你执行命令 :code:`make tests` 时,所有的单测都会被执行一次。注意,有些层可能需要高精度来保证梯度检查单测正确执行。你需要在配置cmake时将 :code:`WITH_DOUBLE` 设置为 `ON` 。 +如果你要为了测试而增加新的文件,例如 :code:`paddle/legacy/gserver/tests/testFCGrad.cpp` ,你需要把该文件加入 :code:`paddle/legacy/gserver/tests/CMakeLists.txt` 中。下面给出了一个例子。当你执行命令 :code:`make tests` 时,所有的单测都会被执行一次。注意,有些层可能需要高精度来保证梯度检查单测正确执行。你需要在配置cmake时将 :code:`WITH_DOUBLE` 设置为 `ON` 。 .. code-block:: bash diff --git a/doc/v2/dev/new_layer_en.rst b/doc/v2/dev/new_layer_en.rst index b05bb45f11..ad72373880 100644 --- a/doc/v2/dev/new_layer_en.rst +++ b/doc/v2/dev/new_layer_en.rst @@ -58,7 +58,7 @@ Finally we can use chain rule to calculate :math:`\frac{\partial z}{\partial x}` Implement C++ Class =================== -The C++ class of the layer implements the initialization, forward, and backward part of the layer. The fully connected layer is at :code:`paddle/gserver/layers/FullyConnectedLayer.h` and :code:`paddle/gserver/layers/FullyConnectedLayer.cpp`. We list simplified version of the code below. +The C++ class of the layer implements the initialization, forward, and backward part of the layer. The fully connected layer is at :code:`paddle/legacy/gserver/layers/FullyConnectedLayer.h` and :code:`paddle/legacy/gserver/layers/FullyConnectedLayer.cpp`. We list simplified version of the code below. It needs to derive the base class :code:`paddle::Layer`, and it needs to override the following functions: @@ -154,7 +154,7 @@ The implementation of the forward part has the following steps. - Every layer must call :code:`Layer::forward(passType);` at the beginning of its :code:`forward` function. - Then it allocates memory for the output using :code:`reserveOutput(batchSize, size);`. This step is necessary because we support the batches to have different batch sizes. :code:`reserveOutput` will change the size of the output accordingly. For the sake of efficiency, we will allocate new memory if we want to expand the matrix, but we will reuse the existing memory block if we want to shrink the matrix. -- Then it computes :math:`\sum_i W_i x + b` using Matrix operations. :code:`getInput(i).value` retrieve the matrix of the i-th input. Each input is a :math:`batchSize \times dim` matrix, where each row represents an single input in a batch. For a complete lists of supported matrix operations, please refer to :code:`paddle/math/Matrix.h` and :code:`paddle/math/BaseMatrix.h`. +- Then it computes :math:`\sum_i W_i x + b` using Matrix operations. :code:`getInput(i).value` retrieve the matrix of the i-th input. Each input is a :math:`batchSize \times dim` matrix, where each row represents an single input in a batch. For a complete lists of supported matrix operations, please refer to :code:`paddle/legacy/math/Matrix.h` and :code:`paddle/legacy/math/BaseMatrix.h`. - Finally it applies the activation function using :code:`forwardActivation();`. It will automatically applies the corresponding activation function specifies in the network configuration. @@ -263,7 +263,7 @@ Finally, you can use :code:`REGISTER_LAYER(fc, FullyConnectedLayer);` to registe REGISTER_LAYER(fc, FullyConnectedLayer); } -If the :code:`cpp` file is put into :code:`paddle/gserver/layers`, it will be automatically added to the compilation list. +If the :code:`cpp` file is put into :code:`paddle/legacy/gserver/layers`, it will be automatically added to the compilation list. Write Gradient Check Unit Test @@ -271,7 +271,7 @@ Write Gradient Check Unit Test An easy way to verify the correctness of new layer's implementation is to write a gradient check unit test. Gradient check unit test utilizes finite difference method to verify the gradient of a layer. It modifies the input with a small perturbation :math:`\Delta x` and observes the changes of output :math:`\Delta y`, the gradient can be computed as :math:`\frac{\Delta y}{\Delta x }`. This gradient can be compared with the gradient computed by the :code:`backward` function of the layer to ensure the correctness of the gradient computation. Notice that the gradient check only tests the correctness of the gradient computation, it does not necessarily guarantee the correctness of the implementation of the :code:`forward` and :code:`backward` function. You need to write more sophisticated unit tests to make sure your layer is implemented correctly. -All the gradient check unit tests are located in :code:`paddle/gserver/tests/test_LayerGrad.cpp`. You are recommended to put your test into a new test file if you are planning to write a new layer. The gradient test of the gradient check unit test of the fully connected layer is listed below. It has the following steps. +All the gradient check unit tests are located in :code:`paddle/legacy/gserver/tests/test_LayerGrad.cpp`. You are recommended to put your test into a new test file if you are planning to write a new layer. The gradient test of the gradient check unit test of the fully connected layer is listed below. It has the following steps. + Create layer configuration. A layer configuration can include the following attributes: - size of the bias parameter. (4096 in our example) @@ -323,7 +323,7 @@ All the gradient check unit tests are located in :code:`paddle/gserver/tests/tes } } -If you are creating a new file for the test, such as :code:`paddle/gserver/tests/testFCGrad.cpp`, you need to add the file to :code:`paddle/gserver/tests/CMakeLists.txt`. An example is given below. All the unit tests will run when you execute the command :code:`make tests`. Notice that some layers might need high accuracy for the gradient check unit tests to work well. You need to configure :code:`WITH_DOUBLE` to `ON` when configuring cmake. +If you are creating a new file for the test, such as :code:`paddle/legacy/gserver/tests/testFCGrad.cpp`, you need to add the file to :code:`paddle/legacy/gserver/tests/CMakeLists.txt`. An example is given below. All the unit tests will run when you execute the command :code:`make tests`. Notice that some layers might need high accuracy for the gradient check unit tests to work well. You need to configure :code:`WITH_DOUBLE` to `ON` when configuring cmake. .. code-block:: bash @@ -339,7 +339,7 @@ If you are creating a new file for the test, such as :code:`paddle/gserver/tests Implement Python Wrapper ======================== -Implementing Python wrapper allows us to use the added layer in configuration files. All the Python wrappers are in file :code:`python/paddle/trainer/config_parser.py`. An example of the Python wrapper for fully connected layer is listed below. It has the following steps: +Implementing Python wrapper allows us to use the added layer in configuration files. All the Python wrappers are in file :code:`python/paddle/legacy/trainer/config_parser.py`. An example of the Python wrapper for fully connected layer is listed below. It has the following steps: - Use :code:`@config_layer('fc')` at the decorator for all the Python wrapper class. :code:`fc` is the identifier of the layer. - Implements :code:`__init__` constructor function. diff --git a/doc/v2/faq/build_and_install/index_cn.rst b/doc/v2/faq/build_and_install/index_cn.rst index f292684fb5..0d64477728 100644 --- a/doc/v2/faq/build_and_install/index_cn.rst +++ b/doc/v2/faq/build_and_install/index_cn.rst @@ -213,3 +213,12 @@ virtualenv本身也是Python的一个包,可以用pip进行安装: 保存并关闭文件。 这样,每次打开终端时就会自动启动名为‘paddle’的Python环境了。 + +10. 通过pip安装的PaddlePaddle在 :code:`import paddle.fluid` 报找不到 :code:`libmkldnn.so` 或 :code:`libmklml_intel.so` +------------------------------------------------------------------------------------------ +出现这种问题的原因是在导入 :code:`paddle.fluid` 时需要加载 :code:`libmkldnn.so` 和 :code:`libmklml_intel.so`, +但是系统没有找到该文件。一般通过pip安装PaddlePaddle时会将 :code:`libmkldnn.so` 和 :code:`libmklml_intel.so` +拷贝到 :code:`/usr/local/lib` 路径下,所以解决办法是将该路径加到 :code:`LD_LIBRARY_PATH` 环境变量下, +即: :code:`export LD_LIBRARY_PATH=/usr/local/lib:$LD_LIBRARY_PATH` 。 + +**注意**:如果是在虚拟环境中安装PaddlePaddle, :code:`libmkldnn.so` 和 :code:`libmklml_intel.so` 可能不在 :code:`/usr/local/lib` 路径下。 \ No newline at end of file diff --git a/doc/v2/faq/parameter/index_cn.rst b/doc/v2/faq/parameter/index_cn.rst index 1fa4b3e131..987e8cf088 100644 --- a/doc/v2/faq/parameter/index_cn.rst +++ b/doc/v2/faq/parameter/index_cn.rst @@ -196,6 +196,6 @@ PaddlePaddle保存的模型参数文件内容由16字节头信息和网络参数 obj="process", args={"src_dict_path": src_dict_path}) -完整源码可参考 `sequence_recurrent `_ 示例。 +完整源码可参考 `sequence_recurrent `_ 示例。 diff --git a/doc/v2/howto/capi/compile_paddle_lib_cn.md b/doc/v2/howto/capi/compile_paddle_lib_cn.md index e223fd33a8..2c87e9afc6 100644 --- a/doc/v2/howto/capi/compile_paddle_lib_cn.md +++ b/doc/v2/howto/capi/compile_paddle_lib_cn.md @@ -18,7 +18,7 @@ cpu_avx_openblas -暂无 +paddle.tgz cpu_noavx_openblas @@ -35,7 +35,12 @@ cuda8.0_cudnn7_avx_mkl paddle.tgz - + + +cuda9.0_cudnn7_avx_mkl +paddle.tgz + + ### 从源码编译 diff --git a/doc/v2/howto/capi/compile_paddle_lib_en.md b/doc/v2/howto/capi/compile_paddle_lib_en.md index 6212a30811..3fa8a18a9f 100644 --- a/doc/v2/howto/capi/compile_paddle_lib_en.md +++ b/doc/v2/howto/capi/compile_paddle_lib_en.md @@ -17,7 +17,7 @@ cpu_avx_openblas -- +paddle.tgz cpu_noavx_openblas @@ -34,7 +34,12 @@ cuda8.0_cudnn7_avx_mkl paddle.tgz - + + +cuda9.0_cudnn7_avx_mkl +paddle.tgz + + ### From source diff --git a/doc/v2/howto/capi/workflow_of_capi_cn.md b/doc/v2/howto/capi/workflow_of_capi_cn.md index 3acdbae28e..db1568a2af 100644 --- a/doc/v2/howto/capi/workflow_of_capi_cn.md +++ b/doc/v2/howto/capi/workflow_of_capi_cn.md @@ -28,9 +28,9 @@ ### 准备预测模型 -准备预测模型部分,我们以手写数字识别任务为例进行介绍。手写数字识别任务定义了一个含有[两个隐层的简单全连接网络](https://github.com/PaddlePaddle/book/blob/develop/02.recognize_digits/README.cn.md#softmax回归softmax-regression),网络接受一幅图片作为输入,将图片分类到 0 ~ 9 类别标签之一。完整代码可以查看[此目录](https://github.com/PaddlePaddle/Paddle/tree/develop/paddle/capi/examples/model_inference/dense) 中的相关脚本。 +准备预测模型部分,我们以手写数字识别任务为例进行介绍。手写数字识别任务定义了一个含有[两个隐层的简单全连接网络](https://github.com/PaddlePaddle/book/blob/develop/02.recognize_digits/README.cn.md#softmax回归softmax-regression),网络接受一幅图片作为输入,将图片分类到 0 ~ 9 类别标签之一。完整代码可以查看[此目录](https://github.com/PaddlePaddle/Paddle/tree/develop/paddle/legacy/capi/examples/model_inference/dense) 中的相关脚本。 -调用C-API开发预测程序需要一个训练好的模型,运行[MNIST手写数字识别目录](https://github.com/PaddlePaddle/Paddle/tree/develop/paddle/capi/examples/model_inference/dense)下的[mnist_v2.py](https://github.com/PaddlePaddle/Paddle/blob/develop/paddle/capi/examples/model_inference/dense/mnist_v2.py)脚本,在终端执行`python mnist_v2.py`,会使用 PaddlePaddle 内置的 [MNIST 数据集](http://yann.lecun.com/exdb/mnist/)进行训练。训练好的模型默认保存在当前运行目录下的`models`目录中。 +调用C-API开发预测程序需要一个训练好的模型,运行[MNIST手写数字识别目录](https://github.com/PaddlePaddle/Paddle/tree/develop/paddle/legacy/capi/examples/model_inference/dense)下的[mnist_v2.py](https://github.com/PaddlePaddle/Paddle/blob/develop/paddle/legacy/capi/examples/model_inference/dense/mnist_v2.py)脚本,在终端执行`python mnist_v2.py`,会使用 PaddlePaddle 内置的 [MNIST 数据集](http://yann.lecun.com/exdb/mnist/)进行训练。训练好的模型默认保存在当前运行目录下的`models`目录中。 下面,我们将训练结束后存储下来的模型转换成预测模型。 @@ -48,7 +48,7 @@ dump_v2_config(predict, "trainer_config.bin", True) ``` - 对[手写数字识别](https://github.com/PaddlePaddle/Paddle/tree/develop/paddle/capi/examples/model_inference/dense)这个示例,[`mnist_v2.py`](https://github.com/PaddlePaddle/Paddle/tree/develop/paddle/capi/examples/model_inference/dense/mnist_v2.py)脚本集成了序列化神经网络结构的过程,可以直接运行 `python mnist_v2.py --task dump_config` 对神经网络结构进行序列化,结果会写入当前运行目录下的`trainer_config.bin`文件中。 + 对[手写数字识别](https://github.com/PaddlePaddle/Paddle/tree/develop/paddle/legacy/capi/examples/model_inference/dense)这个示例,[`mnist_v2.py`](https://github.com/PaddlePaddle/Paddle/tree/develop/paddle/legacy/capi/examples/model_inference/dense/mnist_v2.py)脚本集成了序列化神经网络结构的过程,可以直接运行 `python mnist_v2.py --task dump_config` 对神经网络结构进行序列化,结果会写入当前运行目录下的`trainer_config.bin`文件中。 使用这种方式,需要**在运行时将神经网络的多个可学习参数放在同一个目录中**,C-API可以通过分别指定序列化后的网络结构文件和参数目录来加载训练好的模型。 @@ -68,7 +68,7 @@ merge_v2_model(net, param_file, output_file) ``` - 对[手写数字识别](https://github.com/PaddlePaddle/Paddle/tree/develop/paddle/capi/examples/model_inference/dense)这个示例,可直接运行 `python` [merge_v2_model.py](https://github.com/PaddlePaddle/Paddle/tree/develop/paddle/capi/examples/model_inference/dense/merge_v2_model.py)。序列化结果会写入当前运行目录下的`output.paddle.model`文件中。使用这种方式,运行时C-API可以通过指定`output.paddle.model`文件的路径来加载预测模型。 + 对[手写数字识别](https://github.com/PaddlePaddle/Paddle/tree/develop/paddle/legacy/capi/examples/model_inference/dense)这个示例,可直接运行 `python` [merge_v2_model.py](https://github.com/PaddlePaddle/Paddle/tree/develop/paddle/legacy/capi/examples/model_inference/dense/merge_v2_model.py)。序列化结果会写入当前运行目录下的`output.paddle.model`文件中。使用这种方式,运行时C-API可以通过指定`output.paddle.model`文件的路径来加载预测模型。 #### 注意事项 1. 为使用C-API,在调用`dump_v2_config`序列化神经网络结构时,参数`binary`必须指定为`True`。 @@ -77,10 +77,10 @@ ### 编写预测代码 -预测代码更多详细示例代码请参考[C-API使用示例](https://github.com/PaddlePaddle/Paddle/tree/develop/paddle/capi/examples/model_inference) 目录下的代码示例。这一节对图1中预测代码编写的5个步骤进行介绍和说明。 +预测代码更多详细示例代码请参考[C-API使用示例](https://github.com/PaddlePaddle/Paddle/tree/develop/paddle/legacy/capi/examples/model_inference) 目录下的代码示例。这一节对图1中预测代码编写的5个步骤进行介绍和说明。 #### step 1. 初始化PaddlePaddle运行环境 -第一步需调用[`paddle_init`](https://github.com/PaddlePaddle/Paddle/blob/develop/paddle/capi/main.h#L27) 初始化PaddlePaddle运行环境,该接口接受两个参数:参数的个数和参数列表。 +第一步需调用[`paddle_init`](https://github.com/PaddlePaddle/Paddle/blob/develop/paddle/legacy/capi/main.h#L27) 初始化PaddlePaddle运行环境,该接口接受两个参数:参数的个数和参数列表。 #### step2. 加载模型 @@ -88,8 +88,8 @@ 概念上,在 PaddlePaddle 内部,一个GradientMachine类的对象管理着一组计算层(PaddlePaddle Layers)来完成前向和反向计算,并处理与之相关的所有细节。在调用C-API预测时,只需进行前向计算而无需调用反向计算。这篇文档之后部分会使用`gradient machine`来特指调用PaddlePaddle C-API创建的GradientMachine类的对象。每一个 `gradient machine` 都会管理维护一份训练好的模型,下面是C-API提供的,两种常用的模型加载方式: -1. 调用[`paddle_gradient_machine_load_parameter_from_disk`](https://github.com/PaddlePaddle/Paddle/blob/develop/paddle/capi/gradient_machine.h#L61)接口,从磁盘加载预测模型。这时`gradient machine`会独立拥有一份训练好的模型; -1. 调用[`paddle_gradient_machine_create_shared_param`](https://github.com/PaddlePaddle/Paddle/blob/develop/paddle/capi/gradient_machine.h#L88)接口,与其它`gradient machine`的共享已经加载的预测模型。这种情况多出现在使用多线程预测时,通过多个线程共享同一个模型来减少内存开销。可参考[此示例](https://github.com/PaddlePaddle/Paddle/blob/develop/paddle/capi/examples/model_inference/multi_thread/main.c)。 +1. 调用[`paddle_gradient_machine_load_parameter_from_disk`](https://github.com/PaddlePaddle/Paddle/blob/develop/paddle/legacy/capi/gradient_machine.h#L61)接口,从磁盘加载预测模型。这时`gradient machine`会独立拥有一份训练好的模型; +1. 调用[`paddle_gradient_machine_create_shared_param`](https://github.com/PaddlePaddle/Paddle/blob/develop/paddle/legacy/capi/gradient_machine.h#L88)接口,与其它`gradient machine`的共享已经加载的预测模型。这种情况多出现在使用多线程预测时,通过多个线程共享同一个模型来减少内存开销。可参考[此示例](https://github.com/PaddlePaddle/Paddle/blob/develop/paddle/legacy/capi/examples/model_inference/multi_thread/main.c)。 - 注意事项 @@ -117,7 +117,7 @@ C-API支持的所有输入数据类型和他们的组织方式,请参考“输 #### step 4. 前向计算 -完成上述准备之后,通过调用 [`paddle_gradient_machine_forward`](https://github.com/PaddlePaddle/Paddle/blob/develop/paddle/capi/gradient_machine.h#L73) 接口完成神经网络的前向计算。 +完成上述准备之后,通过调用 [`paddle_gradient_machine_forward`](https://github.com/PaddlePaddle/Paddle/blob/develop/paddle/legacy/capi/gradient_machine.h#L73) 接口完成神经网络的前向计算。 #### step 5. 清理 diff --git a/doc/v2/howto/optimization/gpu_profiling_cn.rst b/doc/v2/howto/optimization/gpu_profiling_cn.rst index 25bcaccb69..f2396716bd 100644 --- a/doc/v2/howto/optimization/gpu_profiling_cn.rst +++ b/doc/v2/howto/optimization/gpu_profiling_cn.rst @@ -50,12 +50,12 @@ GPU则还需要高并行性,才能发挥其全部能力。这正是它们速 **nvprof** 是Nvidia性能分析工具, **nvvp** 则是带GUI的Nvidia可视化性能分析工具。 在这个教程中,我们主要会介绍nvprof和nvvp。 -:code:`test_GpuProfiler` from :code:`paddle/math/tests` directory will be used to evaluate +:code:`test_GpuProfiler` from :code:`paddle/legacy/math/tests` directory will be used to evaluate above profilers. -:code:`paddle/math/test` 目录中的 :code:`test_GpuProfiler` 就是用于展示上述分析工具的用法。 +:code:`paddle/legacy/math/test` 目录中的 :code:`test_GpuProfiler` 就是用于展示上述分析工具的用法。 -.. literalinclude:: ../../../../paddle/math/tests/test_GpuProfiler.cpp +.. literalinclude:: ../../../../paddle/legacy/math/tests/test_GpuProfiler.cpp :language: c++ :lines: 137-151 :linenos: @@ -83,7 +83,7 @@ program crashes when CPU version of PaddlePaddle invokes them. 1. 加入 :code:`REGISTER_TIMER_INFO` 和 :code:`printAllStatus` 函数(如高亮部分)。 - .. literalinclude:: ../../../../paddle/math/tests/test_GpuProfiler.cpp + .. literalinclude:: ../../../../paddle/legacy/math/tests/test_GpuProfiler.cpp :language: c++ :lines: 137-151 :emphasize-lines: 8-12,14 @@ -101,8 +101,8 @@ program crashes when CPU version of PaddlePaddle invokes them. .. code-block:: bash :emphasize-lines: 1,12-15 - > ./paddle/math/tests/test_GpuProfiler - I1117 11:13:42.313065 2522362816 Util.cpp:155] commandline: ./paddle/math/tests/test_GpuProfiler + > ./paddle/legacy/math/tests/test_GpuProfiler + I1117 11:13:42.313065 2522362816 Util.cpp:155] commandline: ./paddle/legacy/math/tests/test_GpuProfiler I1117 11:13:42.845065 2522362816 Util.cpp:130] Calling runInitFunctions I1117 11:13:42.845208 2522362816 Util.cpp:143] Call runInitFunctions done. [==========] Running 1 test from 1 test case. @@ -130,7 +130,7 @@ nvprof 工具 1. 将 :code:`REGISTER_GPU_PROFILER` 函数加到代码中(参考强调部分)。 - .. literalinclude:: ../../../../paddle/math/tests/test_GpuProfiler.cpp + .. literalinclude:: ../../../../paddle/legacy/math/tests/test_GpuProfiler.cpp :language: c++ :lines: 137-151 :emphasize-lines: 6-7 @@ -147,13 +147,13 @@ nvprof 工具 .. code-block:: bash - nvprof ./paddle/math/tests/test_GpuProfiler + nvprof ./paddle/legacy/math/tests/test_GpuProfiler 然后,您就能获得如下的分析结果: .. code-block:: bash - ==78544== Profiling application: ./paddle/math/tests/test_GpuProfiler + ==78544== Profiling application: ./paddle/legacy/math/tests/test_GpuProfiler ==78544== Profiling result: Time(%) Time Calls Avg Min Max Name 27.60% 9.6305ms 5 1.9261ms 3.4560us 6.4035ms [CUDA memcpy HtoD] diff --git a/doc/v2/howto/optimization/gpu_profiling_en.rst b/doc/v2/howto/optimization/gpu_profiling_en.rst index 50adb7da24..6e439be9bb 100644 --- a/doc/v2/howto/optimization/gpu_profiling_en.rst +++ b/doc/v2/howto/optimization/gpu_profiling_en.rst @@ -51,10 +51,10 @@ For general GPU profiling, a bunch of tools are provided from both NVIDIA and th **nvprof** is Nvidia profiler and **nvvp** is (GUI based) Nvidia visual profiler. In this tutorial, we will focus on nvprof and nvvp. -:code:`test_GpuProfiler` from :code:`paddle/math/tests` directory will be used to evaluate +:code:`test_GpuProfiler` from :code:`paddle/legacy/math/tests` directory will be used to evaluate above profilers. -.. literalinclude:: ../../../../paddle/math/tests/test_GpuProfiler.cpp +.. literalinclude:: ../../../../paddle/legacy/math/tests/test_GpuProfiler.cpp :language: c++ :lines: 137-151 :linenos: @@ -80,7 +80,7 @@ As a simple example, consider the following: 1. Add :code:`REGISTER_TIMER_INFO` and :code:`printAllStatus` functions (see the emphasize-lines). - .. literalinclude:: ../../../../paddle/math/tests/test_GpuProfiler.cpp + .. literalinclude:: ../../../../paddle/legacy/math/tests/test_GpuProfiler.cpp :language: c++ :lines: 137-151 :emphasize-lines: 8-12,14 @@ -98,8 +98,8 @@ As a simple example, consider the following: .. code-block:: bash :emphasize-lines: 1,12-15 - > ./paddle/math/tests/test_GpuProfiler - I1117 11:13:42.313065 2522362816 Util.cpp:155] commandline: ./paddle/math/tests/test_GpuProfiler + > ./paddle/legacy/math/tests/test_GpuProfiler + I1117 11:13:42.313065 2522362816 Util.cpp:155] commandline: ./paddle/legacy/math/tests/test_GpuProfiler I1117 11:13:42.845065 2522362816 Util.cpp:130] Calling runInitFunctions I1117 11:13:42.845208 2522362816 Util.cpp:143] Call runInitFunctions done. [==========] Running 1 test from 1 test case. @@ -127,7 +127,7 @@ To use this command line profiler **nvprof**, you can simply issue the following 1. Add :code:`REGISTER_GPU_PROFILER` function (see the emphasize-lines). - .. literalinclude:: ../../../../paddle/math/tests/test_GpuProfiler.cpp + .. literalinclude:: ../../../../paddle/legacy/math/tests/test_GpuProfiler.cpp :language: c++ :lines: 137-151 :emphasize-lines: 6-7 @@ -144,13 +144,13 @@ To use this command line profiler **nvprof**, you can simply issue the following .. code-block:: bash - nvprof ./paddle/math/tests/test_GpuProfiler + nvprof ./paddle/legacy/math/tests/test_GpuProfiler Then, you can get the following profiling result: .. code-block:: bash - ==78544== Profiling application: ./paddle/math/tests/test_GpuProfiler + ==78544== Profiling application: ./paddle/legacy/math/tests/test_GpuProfiler ==78544== Profiling result: Time(%) Time Calls Avg Min Max Name 27.60% 9.6305ms 5 1.9261ms 3.4560us 6.4035ms [CUDA memcpy HtoD] diff --git a/doc/v2/howto/rnn/hrnn_rnn_api_compare_cn.rst b/doc/v2/howto/rnn/hrnn_rnn_api_compare_cn.rst index 67c7b774e9..9d6d417075 100644 --- a/doc/v2/howto/rnn/hrnn_rnn_api_compare_cn.rst +++ b/doc/v2/howto/rnn/hrnn_rnn_api_compare_cn.rst @@ -4,7 +4,7 @@ 单双层RNN API对比介绍 ##################### -本文以PaddlePaddle的双层RNN单元测试为示例,用多对效果完全相同的、分别使用单双层RNN作为网络配置的模型,来讲解如何使用双层RNN。本文中所有的例子,都只是介绍双层RNN的API接口,并不是使用双层RNN解决实际的问题。如果想要了解双层RNN在具体问题中的使用,请参考\ :ref:`algo_hrnn_demo`\ 。本文中示例所使用的单元测试文件是\ `test_RecurrentGradientMachine.cpp `_\ 。 +本文以PaddlePaddle的双层RNN单元测试为示例,用多对效果完全相同的、分别使用单双层RNN作为网络配置的模型,来讲解如何使用双层RNN。本文中所有的例子,都只是介绍双层RNN的API接口,并不是使用双层RNN解决实际的问题。如果想要了解双层RNN在具体问题中的使用,请参考\ :ref:`algo_hrnn_demo`\ 。本文中示例所使用的单元测试文件是\ `test_RecurrentGradientMachine.cpp `_\ 。 示例1:双层RNN,子序列间无Memory ================================ @@ -13,8 +13,8 @@ 在本示例中,单层RNN和双层RNN的网络配置,都是将每一句分好词后的句子,使用LSTM作为encoder,压缩成一个向量。区别是RNN使用两层序列模型,将多句话看成一个整体同时使用encoder压缩。二者语意上完全一致。这组语义相同的示例配置如下: -* 单层RNN\: `sequence_layer_group.conf `_ -* 双层RNN\: `sequence_nest_layer_group.conf `_ +* 单层RNN\: `sequence_layer_group.conf `_ +* 双层RNN\: `sequence_nest_layer_group.conf `_ 读取双层序列数据 @@ -24,18 +24,18 @@ - 本例中的原始数据一共有10个样本。每个样本由两部分组成,一个label(此处都为2)和一个已经分词后的句子。这个数据也被单层RNN网络直接使用。 -.. literalinclude:: ../../../../paddle/gserver/tests/Sequence/tour_train_wdseg +.. literalinclude:: ../../../../paddle/legacy/gserver/tests/Sequence/tour_train_wdseg :language: text - 双层序列数据一共有4个样本。 每个样本间用空行分开,整体数据和原始数据完全一样。但于双层序列的LSTM来说,第一个样本同时encode两条数据成两个向量。这四条数据同时处理的句子数量为\ :code:`[2, 3, 2, 3]`\ 。 -.. literalinclude:: ../../../../paddle/gserver/tests/Sequence/tour_train_wdseg.nest +.. literalinclude:: ../../../../paddle/legacy/gserver/tests/Sequence/tour_train_wdseg.nest :language: text -其次,对于两种不同的输入数据类型,不同DataProvider对比如下(`sequenceGen.py `_)\: +其次,对于两种不同的输入数据类型,不同DataProvider对比如下(`sequenceGen.py `_)\: -.. literalinclude:: ../../../../paddle/gserver/tests/sequenceGen.py +.. literalinclude:: ../../../../paddle/legacy/gserver/tests/sequenceGen.py :language: python :lines: 21-39 :linenos: @@ -47,7 +47,7 @@ - words是原始数据中的每一句话,所对应的词表index数组。它是integer_value_sequence类型的,即整数数组。words即为这个数据中的单层时间序列。 - label是原始数据中对于每一句话的分类标签,它是integer_value类型的。 -.. literalinclude:: ../../../../paddle/gserver/tests/sequenceGen.py +.. literalinclude:: ../../../../paddle/legacy/gserver/tests/sequenceGen.py :language: python :lines: 42-71 :linenos: @@ -64,7 +64,7 @@ 首先,我们看一下单层RNN的配置。代码中9-15行(高亮部分)即为单层RNN序列的使用代码。这里使用了PaddlePaddle预定义好的RNN处理函数。在这个函数中,RNN对于每一个时间步通过了一个LSTM网络。 -.. literalinclude:: ../../../../paddle/gserver/tests/sequence_layer_group.conf +.. literalinclude:: ../../../../paddle/legacy/gserver/tests/sequence_layer_group.conf :language: python :lines: 38-63 :linenos: @@ -85,7 +85,7 @@ * 至此,\ :code:`lstm_last`\ 便和单层RNN配置中的\ :code:`lstm_last`\ 具有相同的结果了。 -.. literalinclude:: ../../../../paddle/gserver/tests/sequence_nest_layer_group.conf +.. literalinclude:: ../../../../paddle/legacy/gserver/tests/sequence_nest_layer_group.conf :language: python :lines: 38-64 :linenos: @@ -107,7 +107,7 @@ - 单层RNN:过了一个很简单的recurrent_group。每一个时间步,当前的输入y和上一个时间步的输出rnn_state做了一个全链接。 -.. literalinclude:: ../../../../paddle/gserver/tests/sequence_rnn.conf +.. literalinclude:: ../../../../paddle/legacy/gserver/tests/sequence_rnn.conf :language: python :lines: 36-48 @@ -116,7 +116,7 @@ - 内层inner_step的recurrent_group和单层序列的几乎一样。除了boot_layer=outer_mem,表示将外层的outer_mem作为内层memory的初始状态。外层outer_step中,outer_mem是一个子句的最后一个向量,即整个双层group是将前一个子句的最后一个向量,作为下一个子句memory的初始状态。 - 从输入数据上看,单双层序列的句子是一样的,只是双层序列将其又做了子序列划分。因此双层序列的配置中,必须将前一个子句的最后一个元素,作为boot_layer传给下一个子句的memory,才能保证和单层序列的配置中“每个时间步都用了上一个时间步的输出结果”一致。 -.. literalinclude:: ../../../../paddle/gserver/tests/sequence_nest_rnn.conf +.. literalinclude:: ../../../../paddle/legacy/gserver/tests/sequence_nest_rnn.conf :language: python :lines: 39-66 @@ -134,7 +134,7 @@ **输入不等长** 是指recurrent_group的多个输入序列,在每个时间步的子序列长度可以不相等。但序列输出时,需要指定与某一个输入的序列信息是一致的。使用\ :red:`targetInlink`\ 可以指定哪一个输入和输出序列信息一致,默认指定第一个输入。 -示例3的配置分别为\ `单层不等长RNN `_\ 和\ `双层不等长RNN `_\ 。 +示例3的配置分别为\ `单层不等长RNN `_\ 和\ `双层不等长RNN `_\ 。 示例3对于单层RNN和双层RNN数据完全相同。 @@ -152,14 +152,14 @@ * 单层RNN\: -.. literalinclude:: ../../../../paddle/gserver/tests/sequence_rnn_multi_unequalength_inputs.py +.. literalinclude:: ../../../../paddle/legacy/gserver/tests/sequence_rnn_multi_unequalength_inputs.py :language: python :lines: 42-59 :linenos: * 双层RNN\ \: -.. literalinclude:: ../../../../paddle/gserver/tests/sequence_nest_rnn_multi_unequalength_inputs.py +.. literalinclude:: ../../../../paddle/legacy/gserver/tests/sequence_nest_rnn_multi_unequalength_inputs.py :language: python :lines: 41-80 :linenos: diff --git a/doc/v2/howto/rnn/hrnn_rnn_api_compare_en.rst b/doc/v2/howto/rnn/hrnn_rnn_api_compare_en.rst index ae997f0805..a4485f7b5e 100644 --- a/doc/v2/howto/rnn/hrnn_rnn_api_compare_en.rst +++ b/doc/v2/howto/rnn/hrnn_rnn_api_compare_en.rst @@ -4,7 +4,7 @@ API comparision between RNN and hierarchical RNN ##################### -This article takes PaddlePaddle's hierarchical RNN unit test as an example. We will use several examples to illestrate the usage of single-layer and hierarchical RNNs. Each example has two model configurations, one for single-layer, and the other for hierarchical RNN. Although the implementations are different, both the two model configurations' effects are the same. All of the examples in this article only describe the API interface of the hierarchical RNN, while we do not use this hierarchical RNN to solve practical problems. If you want to understand the use of hierarchical RNN in specific issues, please refer to \ :ref:`algo_hrnn_demo`\ 。The unit test file used in this article's example is \ `test_RecurrentGradientMachine.cpp `_\ 。 +This article takes PaddlePaddle's hierarchical RNN unit test as an example. We will use several examples to illestrate the usage of single-layer and hierarchical RNNs. Each example has two model configurations, one for single-layer, and the other for hierarchical RNN. Although the implementations are different, both the two model configurations' effects are the same. All of the examples in this article only describe the API interface of the hierarchical RNN, while we do not use this hierarchical RNN to solve practical problems. If you want to understand the use of hierarchical RNN in specific issues, please refer to \ :ref:`algo_hrnn_demo`\ 。The unit test file used in this article's example is \ `test_RecurrentGradientMachine.cpp `_\ 。 Example 1:Hierarchical RNN without Memory between subsequences ================================ @@ -13,8 +13,8 @@ The classical case in the hierarchical RNN is to perform sequence operations on In this example, the network configuration of single-layer RNNs and hierarchical RNNs are all to use LSTM as en encoder to compress a word-segmented sentence into a vector. The difference is that, RNN uses a hierarchical RNN model, treating multiple sentences as a whole to use encoder to compress simultaneously. They are completely consistent in their semantic meanings. This pair of semantically identical example configurations is as follows: -* RNN\: `sequence_layer_group.conf `_ -* Hierarchical RNN\: `sequence_nest_layer_group.conf `_ +* RNN\: `sequence_layer_group.conf `_ +* Hierarchical RNN\: `sequence_nest_layer_group.conf `_ Reading hierarchical sequence data @@ -24,18 +24,18 @@ Firstly, the original data in this example is as follows \: - The original data in this example has 10 samples. Each of the sample includes two components: a lable(all 2 here), and a word-segmented sentence. This data is used by single RNN as well. -.. literalinclude:: ../../../../paddle/gserver/tests/Sequence/tour_train_wdseg +.. literalinclude:: ../../../../paddle/legacy/gserver/tests/Sequence/tour_train_wdseg :language: text - The data for hierarchical RNN has 4 samples. Every sample is seperated by a blank line, while the content of the data is the same as the original data. But as for hierarchical LSTM, the first sample will encode two sentences into two vectors simultaneously. The sentence count dealed simultaneously by this 4 samples are \ :code:`[2, 3, 2, 3]`\ . -.. literalinclude:: ../../../../paddle/gserver/tests/Sequence/tour_train_wdseg.nest +.. literalinclude:: ../../../../paddle/legacy/gserver/tests/Sequence/tour_train_wdseg.nest :language: text -Secondly, as for these two types of different input data formats, the contrast of different DataProviders are as follows (`sequenceGen.py `_)\: +Secondly, as for these two types of different input data formats, the contrast of different DataProviders are as follows (`sequenceGen.py `_)\: -.. literalinclude:: ../../../../paddle/gserver/tests/sequenceGen.py +.. literalinclude:: ../../../../paddle/legacy/gserver/tests/sequenceGen.py :language: python :lines: 21-39 :linenos: @@ -47,7 +47,7 @@ Secondly, as for these two types of different input data formats, the contrast o - "words" is a list of word table indices corresponding to each word in the sentence in the original data. Its data type is integer_value_sequence, that is integer list. So, "words" is a singler-layer time series in the data. - "label" is the categorical label of each sentence, whose data type is integer_value. -.. literalinclude:: ../../../../paddle/gserver/tests/sequenceGen.py +.. literalinclude:: ../../../../paddle/legacy/gserver/tests/sequenceGen.py :language: python :lines: 42-71 :linenos: @@ -64,7 +64,7 @@ Model configuration Firstly, let's look at the configuration of single-layer RNN. The hightlighted part of line 9 to line 15 is the usage of single-layer RNN. Here we use the pre-defined RNN process function in PaddlePaddle. In this function, for each time step, RNN passes through an LSTM network. -.. literalinclude:: ../../../../paddle/gserver/tests/sequence_layer_group.conf +.. literalinclude:: ../../../../paddle/legacy/gserver/tests/sequence_layer_group.conf :language: python :lines: 38-63 :linenos: @@ -85,7 +85,7 @@ Secondly, let's look at the model configuration of hierarchical RNN which has th * Till now, \ :code:`lstm_last`\ has the same result as \ :code:`lstm_last`\ in single-layer RNN configuration. -.. literalinclude:: ../../../../paddle/gserver/tests/sequence_nest_layer_group.conf +.. literalinclude:: ../../../../paddle/legacy/gserver/tests/sequence_nest_layer_group.conf :language: python :lines: 38-64 :linenos: @@ -107,7 +107,7 @@ We select the different parts between single-layer RNN and hierarchical RNN conf - single-layer RNN:passes through a simple recurrent_group. For each time step, the current input y and the last time step's output rnn_state pass through a fully-connected layer. -.. literalinclude:: ../../../../paddle/gserver/tests/sequence_rnn.conf +.. literalinclude:: ../../../../paddle/legacy/gserver/tests/sequence_rnn.conf :language: python :lines: 36-48 @@ -116,7 +116,7 @@ We select the different parts between single-layer RNN and hierarchical RNN conf - The recurrent_group of inner layer's inner_step is nearly the same as single-layer sequence, except for the case of boot_layer=outer_mem, which means using the outer layer's outer_mem as the initial state for the inner layer's memory. In the outer layer's out_step, outer_mem is the last vector of a subsequence, that is, the whole hierarchical group uses the last vector of the previous subsequence as the initial state for the next subsequence's memory. - From the aspect of the input data, sentences from single-layer and hierarchical RNN are the same. The only difference is that, hierarchical RNN disassembes the sequence into subsequences. So in the hierarchical RNN configuration, we must use the last element of the previous subsequence as a boot_layer for the memory of the next subsequence, so that it makes no difference with "every time step uses the output of last time step" in the sigle-layer RNN configuration. -.. literalinclude:: ../../../../paddle/gserver/tests/sequence_nest_rnn.conf +.. literalinclude:: ../../../../paddle/legacy/gserver/tests/sequence_nest_rnn.conf :language: python :lines: 39-66 @@ -134,7 +134,7 @@ Example 3:hierarchical RNN with unequal length inputs **unequal length inputs** means in the multiple input sequences of recurrent_group, the lengths of subsequences can be unequal. But the output of the sequence, needs to be consistent with one of the input sequences. Using \ :red:`targetInlink`\ can help you specify which of the input sequences and the output sequence can be consistent, by default is the first input. -The configurations of Example 3 are \ `sequence_rnn_multi_unequalength_inputs `_ \ and \ `sequence_nest_rnn_multi_unequalength_inputs `_\ . +The configurations of Example 3 are \ `sequence_rnn_multi_unequalength_inputs `_ \ and \ `sequence_nest_rnn_multi_unequalength_inputs `_\ . The data for the configurations of Example 3's single-layer RNN and hierarchical RNN are exactly the same. @@ -152,14 +152,14 @@ Similar to Example 2's configuration, Example 3's configuration uses single-laye * single-layer RNN\: -.. literalinclude:: ../../../../paddle/gserver/tests/sequence_rnn_multi_unequalength_inputs.py +.. literalinclude:: ../../../../paddle/legacy/gserver/tests/sequence_rnn_multi_unequalength_inputs.py :language: python :lines: 42-59 :linenos: * hierarchical RNN\ \: -.. literalinclude:: ../../../../paddle/gserver/tests/sequence_nest_rnn_multi_unequalength_inputs.py +.. literalinclude:: ../../../../paddle/legacy/gserver/tests/sequence_nest_rnn_multi_unequalength_inputs.py :language: python :lines: 41-80 :linenos: diff --git a/go/pserver/optimizer.go b/go/pserver/optimizer.go index f17577997b..eba0c47e19 100644 --- a/go/pserver/optimizer.go +++ b/go/pserver/optimizer.go @@ -16,7 +16,7 @@ package pserver // #cgo CFLAGS: -I ../../ // #cgo LDFLAGS: ${SRCDIR}/client/c/libpaddle_go_optimizer.a -lstdc++ -lm -// #include "paddle/optimizer/optimizer.h" +// #include "paddle/legacy/optimizer/optimizer.h" // #include // #include import "C" diff --git a/paddle/CMakeLists.txt b/paddle/CMakeLists.txt index d722eec189..6653244507 100644 --- a/paddle/CMakeLists.txt +++ b/paddle/CMakeLists.txt @@ -1,24 +1,24 @@ if(NOT WITH_FLUID_ONLY) - add_subdirectory(cuda) - add_subdirectory(function) - add_subdirectory(utils) - add_subdirectory(math) - add_subdirectory(gserver) - add_subdirectory(parameter) + add_subdirectory(legacy/cuda) + add_subdirectory(legacy/function) + add_subdirectory(legacy/utils) + add_subdirectory(legacy/math) + add_subdirectory(legacy/gserver) + add_subdirectory(legacy/parameter) if(MOBILE_INFERENCE) - add_subdirectory(capi) + add_subdirectory(legacy/capi) else() - add_subdirectory(pserver) - add_subdirectory(trainer) + add_subdirectory(legacy/pserver) + add_subdirectory(legacy/trainer) add_subdirectory(scripts) if(WITH_C_API) - add_subdirectory(capi) + add_subdirectory(legacy/capi) endif() if(WITH_SWIG_PY) - add_subdirectory(api) + add_subdirectory(legacy/api) endif() endif() endif() diff --git a/paddle/contrib/CMakeLists.txt b/paddle/contrib/CMakeLists.txt index 70e3a0583d..4b19256ef4 100644 --- a/paddle/contrib/CMakeLists.txt +++ b/paddle/contrib/CMakeLists.txt @@ -14,4 +14,3 @@ # add_subdirectory(inference) -add_subdirectory(tape) diff --git a/paddle/contrib/inference/CMakeLists.txt b/paddle/contrib/inference/CMakeLists.txt index 0f56d648b1..c30eff5010 100644 --- a/paddle/contrib/inference/CMakeLists.txt +++ b/paddle/contrib/inference/CMakeLists.txt @@ -19,6 +19,9 @@ endif(APPLE) set(inference_deps paddle_inference_api paddle_fluid_api) +if(WITH_GPU AND TENSORRT_FOUND) + set(inference_deps ${inference_deps} paddle_inference_tensorrt_subgraph_engine) +endif() function(inference_api_test TARGET_NAME) if (WITH_TESTING) @@ -43,6 +46,15 @@ cc_library(paddle_inference_api SRCS paddle_inference_api.cc paddle_inference_api_impl.cc DEPS ${FLUID_CORE_MODULES} ${GLOB_OP_LIB}) +# Here the shared library doesn't depend on other fluid libraries, or double free will occur. +cc_library(paddle_inference_api_shared SHARED + SRCS paddle_inference_api.cc paddle_inference_api_impl.cc) +set_target_properties(paddle_inference_api_shared PROPERTIES OUTPUT_NAME paddle_inference_api) +if(NOT APPLE) + set(LINK_FLAGS "-fPIC -fvisibility=hidden") + set_target_properties(paddle_inference_api_shared PROPERTIES LINK_FLAGS "${LINK_FLAGS}") +endif() + cc_test(test_paddle_inference_api SRCS test_paddle_inference_api.cc DEPS paddle_inference_api) @@ -50,17 +62,30 @@ cc_test(test_paddle_inference_api inference_api_test(test_paddle_inference_api_impl ARGS test_word2vec test_image_classification) -if (WITH_ANAKIN AND WITH_TESTING) # only needed in CI +if(WITH_GPU AND TENSORRT_FOUND) +cc_library(paddle_inference_tensorrt_subgraph_engine + SRCS paddle_inference_api_tensorrt_subgraph_engine.cc + DEPS paddle_inference_api analysis tensorrt_engine paddle_inference_api paddle_fluid_api) + +inference_api_test(test_paddle_inference_api_tensorrt_subgraph_engine ARGS test_word2vec) +endif() + +if (WITH_ANAKIN) # only needed in CI # Due to Anakin do not have official library releases and the versions of protobuf and cuda do not match Paddle's, # so anakin library will not be merged to our official inference library. To use anakin prediction API, one need to # compile the libinference_anakin_api.a and compile with anakin.so. - nv_library(inference_anakin_api SHARED SRCS paddle_inference_api.cc paddle_inference_api_anakin_engine.cc) + nv_library(inference_anakin_api SRCS paddle_inference_api.cc paddle_inference_api_anakin_engine.cc) + nv_library(inference_anakin_api_shared SHARED SRCS paddle_inference_api.cc paddle_inference_api_anakin_engine.cc) target_compile_options(inference_anakin_api BEFORE PUBLIC ${ANAKIN_COMPILE_EXTRA_FLAGS}) + target_compile_options(inference_anakin_api_shared BEFORE PUBLIC ${ANAKIN_COMPILE_EXTRA_FLAGS}) target_link_libraries(inference_anakin_api anakin anakin_saber_common) - cc_test(inference_anakin_test SRCS paddle_inference_api_anakin_engine_tester.cc + target_link_libraries(inference_anakin_api_shared anakin anakin_saber_common) + if (WITH_TESTING) + cc_test(inference_anakin_test SRCS paddle_inference_api_anakin_engine_tester.cc ARGS --model=${ANAKIN_INSTALL_DIR}/mobilenet_v2.anakin.bin DEPS inference_anakin_api) - target_compile_options(inference_anakin_test BEFORE PUBLIC ${ANAKIN_COMPILE_EXTRA_FLAGS}) + target_compile_options(inference_anakin_test BEFORE PUBLIC ${ANAKIN_COMPILE_EXTRA_FLAGS}) + endif(WITH_TESTING) endif() if(WITH_TESTING) diff --git a/paddle/contrib/inference/demo/CMakeLists.txt b/paddle/contrib/inference/demo/CMakeLists.txt index 7b0fa77ad1..ecece6fe34 100644 --- a/paddle/contrib/inference/demo/CMakeLists.txt +++ b/paddle/contrib/inference/demo/CMakeLists.txt @@ -14,3 +14,48 @@ # inference_api_test(simple_on_word2vec ARGS test_word2vec) + +option(WITH_INFERENCE_DEMO "Compile with Inference demo" OFF) +if(NOT WITH_INFERENCE_DEMO) + return() +endif() + +set(DEMO_INSTALL_DIR "${PADDLE_BINARY_DIR}/inference_demo") +set(URL_ROOT http://paddlemodels.bj.bcebos.com/inference-vis-demos%2F) + +function(inference_download_test_demo TARGET) + if (NOT WITH_TESTING) + return() + endif() + set(options "") + set(oneValueArgs URL) + set(multiValueArgs SRCS) + cmake_parse_arguments(tests "${options}" "${oneValueArgs}" "${multiValueArgs}" ${ARGN}) + + set(test_dir "${DEMO_INSTALL_DIR}/${TARGET}") + message(STATUS "inference demo ${test_dir}") + + if(NOT EXISTS "${test_dir}") + message(STATUS "Download ${TARGET} model from ${tests_URL}") + execute_process(COMMAND bash -c "mkdir -p ${test_dir}") + execute_process(COMMAND bash -c "cd ${test_dir}; wget -q ${tests_URL}") + execute_process(COMMAND bash -c "cd ${test_dir}; tar xzf *.tar.gz") + endif() + + cc_test(${TARGET} SRCS "${tests_SRCS}" + DEPS paddle_inference_api paddle_fluid + ARGS --data=${test_dir}/data.txt + --modeldir=${test_dir}/model + --refer=${test_dir}/result.txt) +endfunction() + +# disable mobilenet test +#inference_download_test_demo(mobilenet_inference_demo +# SRCS vis_demo.cc +# URL ${URL_ROOT}mobilenet.tar.gz) +inference_download_test_demo(se_resnext50_inference_demo + SRCS vis_demo.cc + URL ${URL_ROOT}se_resnext50.tar.gz) +inference_download_test_demo(ocr_inference_demo + SRCS vis_demo.cc + URL ${URL_ROOT}ocr.tar.gz) diff --git a/paddle/contrib/inference/demo/README.md b/paddle/contrib/inference/demo/README.md new file mode 100644 index 0000000000..f1d2566602 --- /dev/null +++ b/paddle/contrib/inference/demo/README.md @@ -0,0 +1,36 @@ +# Infernce Demos + +Input data format: + +- Each line contains a single record +- Each record's format is + +``` +\t +``` + +Follow the C++ codes in `vis_demo.cc`. + +## MobileNet + +To execute the demo, simply run + +```sh +./mobilenet_inference_demo --modeldir --data +``` + +## SE-ResNeXt-50 + +To execute the demo, simply run + +```sh +./se_resnext50_inference_demo --modeldir --data +``` + +## OCR + +To execute the demo, simply run + +```sh +./ocr_inference_demo --modeldir --data +``` diff --git a/paddle/contrib/inference/demo/simple_on_word2vec.cc b/paddle/contrib/inference/demo/simple_on_word2vec.cc index 192a641426..c253014642 100644 --- a/paddle/contrib/inference/demo/simple_on_word2vec.cc +++ b/paddle/contrib/inference/demo/simple_on_word2vec.cc @@ -21,6 +21,7 @@ limitations under the License. */ #include #include #include "paddle/contrib/inference/paddle_inference_api.h" + namespace paddle { namespace demo { @@ -40,10 +41,9 @@ void Main(bool use_gpu) { //# 2. Prepare input. int64_t data[4] = {1, 2, 3, 4}; - PaddleBuf buf{.data = data, .length = sizeof(data)}; PaddleTensor tensor{.name = "", .shape = std::vector({4, 1}), - .data = buf, + .data = PaddleBuf(data, sizeof(data)), .dtype = PaddleDType::INT64}; // For simplicity, we set all the slots with the same data. @@ -55,14 +55,12 @@ void Main(bool use_gpu) { //# 4. Get output. ASSERT_EQ(outputs.size(), 1UL); - LOG(INFO) << "output buffer size: " << outputs.front().data.length; - const size_t num_elements = outputs.front().data.length / sizeof(float); + LOG(INFO) << "output buffer size: " << outputs.front().data.length(); + const size_t num_elements = outputs.front().data.length() / sizeof(float); // The outputs' buffers are in CPU memory. for (size_t i = 0; i < std::min(5UL, num_elements); i++) { - LOG(INFO) << static_cast(outputs.front().data.data)[i]; + LOG(INFO) << static_cast(outputs.front().data.data())[i]; } - // TODO(Superjomn): this is should be free automatically - free(outputs[0].data.data); } } @@ -86,10 +84,9 @@ void MainThreads(int num_threads, bool use_gpu) { for (int batch_id = 0; batch_id < num_batches; ++batch_id) { // 2. Dummy Input Data int64_t data[4] = {1, 2, 3, 4}; - PaddleBuf buf{.data = data, .length = sizeof(data)}; PaddleTensor tensor{.name = "", .shape = std::vector({4, 1}), - .data = buf, + .data = PaddleBuf(data, sizeof(data)), .dtype = PaddleDType::INT64}; std::vector inputs(4, tensor); std::vector outputs; @@ -99,13 +96,13 @@ void MainThreads(int num_threads, bool use_gpu) { // 4. Get output. ASSERT_EQ(outputs.size(), 1UL); LOG(INFO) << "TID: " << tid << ", " - << "output buffer size: " << outputs.front().data.length; - const size_t num_elements = outputs.front().data.length / sizeof(float); + << "output buffer size: " << outputs.front().data.length(); + const size_t num_elements = + outputs.front().data.length() / sizeof(float); // The outputs' buffers are in CPU memory. for (size_t i = 0; i < std::min(5UL, num_elements); i++) { - LOG(INFO) << static_cast(outputs.front().data.data)[i]; + LOG(INFO) << static_cast(outputs.front().data.data())[i]; } - free(outputs[0].data.data); } }); } diff --git a/paddle/contrib/inference/demo/utils.h b/paddle/contrib/inference/demo/utils.h new file mode 100644 index 0000000000..b5330d8d9d --- /dev/null +++ b/paddle/contrib/inference/demo/utils.h @@ -0,0 +1,68 @@ +// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#pragma once +#include +#include + +#include "paddle/contrib/inference/paddle_inference_api.h" + +namespace paddle { +namespace demo { + +static void split(const std::string& str, + char sep, + std::vector* pieces) { + pieces->clear(); + if (str.empty()) { + return; + } + size_t pos = 0; + size_t next = str.find(sep, pos); + while (next != std::string::npos) { + pieces->push_back(str.substr(pos, next - pos)); + pos = next + 1; + next = str.find(sep, pos); + } + if (!str.substr(pos).empty()) { + pieces->push_back(str.substr(pos)); + } +} + +/* + * Get a summary of a PaddleTensor content. + */ +static std::string SummaryTensor(const PaddleTensor& tensor) { + std::stringstream ss; + int num_elems = tensor.data.length() / PaddleDtypeSize(tensor.dtype); + + ss << "data[:10]\t"; + switch (tensor.dtype) { + case PaddleDType::INT64: { + for (int i = 0; i < std::min(num_elems, 10); i++) { + ss << static_cast(tensor.data.data())[i] << " "; + } + break; + } + case PaddleDType::FLOAT32: + for (int i = 0; i < std::min(num_elems, 10); i++) { + ss << static_cast(tensor.data.data())[i] << " "; + } + break; + } + return ss.str(); +} + +} // namespace demo +} // namespace paddle diff --git a/paddle/contrib/inference/demo/vis_demo.cc b/paddle/contrib/inference/demo/vis_demo.cc new file mode 100644 index 0000000000..45575f9a86 --- /dev/null +++ b/paddle/contrib/inference/demo/vis_demo.cc @@ -0,0 +1,149 @@ +/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + +http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +/* + * This file contains demo for mobilenet, se-resnext50 and ocr. + */ + +#include +#include // use glog instead of PADDLE_ENFORCE to avoid importing other paddle header files. +#include +#include +#include +#include "paddle/contrib/inference/demo/utils.h" +#include "paddle/contrib/inference/paddle_inference_api.h" + +#ifdef PADDLE_WITH_CUDA +DECLARE_double(fraction_of_gpu_memory_to_use); +#endif + +namespace paddle { +namespace demo { + +DEFINE_string(modeldir, "", "Directory of the inference model."); +DEFINE_string(refer, "", "path to reference result for comparison."); +DEFINE_string( + data, + "", + "path of data; each line is a record, format is " + "'\t data; + std::vector shape; +}; + +void split(const std::string& str, char sep, std::vector* pieces); + +Record ProcessALine(const std::string& line) { + LOG(INFO) << "process a line"; + std::vector columns; + split(line, '\t', &columns); + CHECK_EQ(columns.size(), 2UL) + << "data format error, should be \t"; + + Record record; + std::vector data_strs; + split(columns[0], ' ', &data_strs); + for (auto& d : data_strs) { + record.data.push_back(std::stof(d)); + } + + std::vector shape_strs; + split(columns[1], ' ', &shape_strs); + for (auto& s : shape_strs) { + record.shape.push_back(std::stoi(s)); + } + LOG(INFO) << "data size " << record.data.size(); + LOG(INFO) << "data shape size " << record.shape.size(); + return record; +} + +void CheckOutput(const std::string& referfile, const PaddleTensor& output) { + std::string line; + std::ifstream file(referfile); + std::getline(file, line); + auto refer = ProcessALine(line); + file.close(); + + size_t numel = output.data.length() / PaddleDtypeSize(output.dtype); + LOG(INFO) << "predictor output numel " << numel; + LOG(INFO) << "reference output numel " << refer.data.size(); + EXPECT_EQ(numel, refer.data.size()); + switch (output.dtype) { + case PaddleDType::INT64: { + for (size_t i = 0; i < numel; ++i) { + EXPECT_EQ(static_cast(output.data.data())[i], refer.data[i]); + } + break; + } + case PaddleDType::FLOAT32: + for (size_t i = 0; i < numel; ++i) { + EXPECT_NEAR( + static_cast(output.data.data())[i], refer.data[i], 1e-5); + } + break; + } +} + +/* + * Use the native fluid engine to inference the demo. + */ +void Main(bool use_gpu) { + NativeConfig config; + config.param_file = FLAGS_modeldir + "/__params__"; + config.prog_file = FLAGS_modeldir + "/__model__"; + config.use_gpu = use_gpu; + config.device = 0; +#ifdef PADDLE_WITH_CUDA + config.fraction_of_gpu_memory = FLAGS_fraction_of_gpu_memory_to_use; +#endif + + LOG(INFO) << "init predictor"; + auto predictor = + CreatePaddlePredictor(config); + + LOG(INFO) << "begin to process data"; + // Just a single batch of data. + std::string line; + std::ifstream file(FLAGS_data); + std::getline(file, line); + auto record = ProcessALine(line); + file.close(); + + // Inference. + PaddleTensor input{ + .name = "xx", + .shape = record.shape, + .data = PaddleBuf(record.data.data(), record.data.size() * sizeof(float)), + .dtype = PaddleDType::FLOAT32}; + + LOG(INFO) << "run executor"; + std::vector output; + predictor->Run({input}, &output); + + LOG(INFO) << "output.size " << output.size(); + auto& tensor = output.front(); + LOG(INFO) << "output: " << SummaryTensor(tensor); + + // compare with reference result + CheckOutput(FLAGS_refer, tensor); +} + +TEST(demo, vis_demo_cpu) { Main(false /*use_gpu*/); } +#ifdef PADDLE_WITH_CUDA +TEST(demo, vis_demo_gpu) { Main(true /*use_gpu*/); } +#endif +} // namespace demo +} // namespace paddle diff --git a/paddle/contrib/inference/high_level_api.md b/paddle/contrib/inference/high_level_api.md new file mode 100644 index 0000000000..eb92885052 --- /dev/null +++ b/paddle/contrib/inference/high_level_api.md @@ -0,0 +1,60 @@ +# Inference High-level APIs +This document describes the high-level inference APIs, one can use them to deploy a Paddle model for an application quickly. + +The APIs are described in `paddle_inference_api.h`, just one header file, and two libaries `libpaddle_fluid.so` and `libpaddle_fluid_api.so` are needed for a deployment. + +## PaddleTensor +We provide the `PaddleTensor` data structure to give a general tensor interface. + +The definition is + +```c++ +struct PaddleTensor { + std::string name; // variable name. + std::vector shape; + PaddleBuf data; // blob of data. + PaddleDType dtype; +}; +``` + +The data is stored in a continuous memory `PaddleBuf,` and a `PaddleDType` specifies tensor's data type. +The `name` field is used to specify the name of an input variable, +that is important when there are multiple inputs and need to distinguish which variable to set. + +## engine +The inference APIs has two different underlying engines + +- the native engine, which is consists of the native operators and framework, +- the Anakin engine, which has an Anakin library embedded. + +The native engine takes a native Paddle model as input, and supports any model that trained by Paddle, +the Anakin engine is faster for some model, +but it can only take the Anakin model as input(user need to transform the format first manually) and currently not all Paddle models are supported. + +```c++ +enum class PaddleEngineKind { + kNative = 0, // Use the native Fluid facility. + kAnakin, // Use Anakin for inference. +}; +``` + +## PaddlePredictor and how to create one +The main interface is `PaddlePredictor,` there are following methods + +- `bool Run(const std::vector& inputs, std::vector* output_data)` + - take inputs and output `output_data.` +- `Clone` to clone a predictor from an existing one, with model parameter shared. + +There is a factory method to help create a predictor, and the user takes the ownership of this object. + +```c++ +template +std::unique_ptr CreatePaddlePredictor(const ConfigT& config); +``` + +By specifying the engine kind and config, one can get a specific implementation. + +## Reference + +- [paddle_inference_api.h](./paddle_inference_api.h) +- [some demos](./demo) diff --git a/paddle/contrib/inference/high_level_api_cn.md b/paddle/contrib/inference/high_level_api_cn.md new file mode 100644 index 0000000000..a57f015a4e --- /dev/null +++ b/paddle/contrib/inference/high_level_api_cn.md @@ -0,0 +1,87 @@ +# Paddle 预测 API + +为了更简单方便的预测部署,Fluid 提供了一套高层 API 用来隐藏底层不同的优化实现。 + +预测库包含: + +- 头文件 `paddle_inference_api.h` 定义了所有的接口 +- 库文件`libpaddle_fluid.so` 或 `libpaddle_fluid.a` +- 库文件 `libpaddle_inference_api.so` 或 `libpaddle_inference_api.a` + +下面是详细的一些 API 概念介绍 + +## PaddleTensor + +PaddleTensor 定义了预测最基本的输入输出的数据格式,其定义是 + +```c++ +struct PaddleTensor { + std::string name; // variable name. + std::vector shape; + PaddleBuf data; // blob of data. + PaddleDType dtype; +}; +``` + +- `name` 用于指定输入数据对应的 模型中variable 的名字 (暂时没有用,但会在后续支持任意 target 时启用) +- `shape` 表示一个 Tensor 的 shape +- `data` 数据以连续内存的方式存储在`PaddleBuf` 中,`PaddleBuf` 可以接收外面的数据或者独立`malloc`内存,详细可以参考头文件中相关定义。 +- `dtype` 表示 Tensor 的数据类型 + +## engine + +高层 API 底层有多种优化实现,我们称之为 engine,目前有三种 engine + +- 原生 engine,由 paddle 原生的 forward operator 组成,可以天然支持所有paddle 训练出的模型, +- Anakin engine,封装了 [Anakin](https://github.com/PaddlePaddle/Anakin) ,在某些模型上性能不错,但只能接受自带模型格式,无法支持所有 paddle 模型, +- TensorRT mixed engine,用子图的方式支持了 [TensorRT](https://developer.nvidia.com/tensorrt) ,支持所有paddle 模型,并自动切割部分计算子图到 TensorRT 上加速(WIP) + +其实现为 + +```c++ +enum class PaddleEngineKind { + kNative = 0, // Use the native Fluid facility. + kAnakin, // Use Anakin for inference. + kAutoMixedTensorRT // Automatically mixing TensorRT with the Fluid ops. +}; +``` + +## 预测部署过程 + +总体上分为以下步骤 + +1. 用合适的配置创建 `PaddlePredictor` +2. 创建输入用的 `PaddleTensor`,传入到 `PaddlePredictor` 中 +3. 获取输出的 `PaddleTensor` ,将结果取出 + +下面完整演示一个简单的模型,部分细节代码隐去 + +```c++ +#include "paddle_inference_api.h" + +// 创建一个 config,并修改相关设置 +paddle::NativeConfig config; +config.model_dir = "xxx"; +config.use_gpu = false; +// 创建一个原生的 PaddlePredictor +auto predictor = + paddle::CreatePaddlePredictor(config); +// 创建输入 tensor +int64_t data[4] = {1, 2, 3, 4}; +paddle::PaddleTensor tensor{.name = "", + .shape = std::vector({4, 1}), + .data = PaddleBuf(data, sizeof(data)), + .dtype = PaddleDType::INT64}; +// 创建输出 tensor,输出 tensor 的内存可以复用 +std::vector outputs; +// 执行预测 +CHECK(predictor->Run(slots, &outputs)); +// 获取 outputs ... +``` + +编译时,联编 `libpaddle_fluid.a/.so` 和 `libpaddle_inference_api.a/.so` 便可。 + +## 详细代码参考 + +- [inference demos](./demo) +- [复杂单线程/多线程例子](https://github.com/PaddlePaddle/Paddle/blob/develop/paddle/contrib/inference/test_paddle_inference_api_impl.cc) diff --git a/paddle/contrib/inference/paddle_inference_api.cc b/paddle/contrib/inference/paddle_inference_api.cc index d67e1e7667..4fe198ad7d 100644 --- a/paddle/contrib/inference/paddle_inference_api.cc +++ b/paddle/contrib/inference/paddle_inference_api.cc @@ -13,3 +13,65 @@ See the License for the specific language governing permissions and limitations under the License. */ #include "paddle/contrib/inference/paddle_inference_api.h" + +namespace paddle { + +int PaddleDtypeSize(PaddleDType dtype) { + switch (dtype) { + case PaddleDType::FLOAT32: + return sizeof(float); + case PaddleDType::INT64: + return sizeof(int64_t); + default: + assert(false); + return -1; + } +} + +PaddleBuf::PaddleBuf(PaddleBuf&& other) + : data_(other.data_), + length_(other.length_), + memory_owned_(other.memory_owned_) { + other.memory_owned_ = false; + other.data_ = nullptr; + other.length_ = 0; +} + +PaddleBuf::PaddleBuf(const PaddleBuf& other) { *this = other; } + +PaddleBuf& PaddleBuf::operator=(const PaddleBuf& other) { + // only the buffer with external memory can be copied + assert(!other.memory_owned_); + data_ = other.data_; + length_ = other.length_; + memory_owned_ = other.memory_owned_; + return *this; +} + +void PaddleBuf::Resize(size_t length) { + // Only the owned memory can be reset, the external memory can't be changed. + if (length_ == length) return; + assert(memory_owned_); + Free(); + data_ = new char[length]; + length_ = length; + memory_owned_ = true; +} + +void PaddleBuf::Reset(void* data, size_t length) { + Free(); + memory_owned_ = false; + data_ = data; + length_ = length; +} + +void PaddleBuf::Free() { + if (memory_owned_ && data_) { + assert(length_ > 0); + delete static_cast(data_); + data_ = nullptr; + length_ = 0; + } +} + +} // namespace paddle diff --git a/paddle/contrib/inference/paddle_inference_api.h b/paddle/contrib/inference/paddle_inference_api.h index 77e2d77b6b..b8ba2d14a5 100644 --- a/paddle/contrib/inference/paddle_inference_api.h +++ b/paddle/contrib/inference/paddle_inference_api.h @@ -15,12 +15,13 @@ limitations under the License. */ /* * This file contains the definition of a simple Inference API for Paddle. * - * ATTENTION: It requires some C++ features, for lower version C++ or C, we + * ATTENTION: It requires some C++11 features, for lower version C++ or C, we * might release another API. */ #pragma once +#include #include #include #include @@ -32,12 +33,38 @@ enum PaddleDType { INT64, }; -struct PaddleBuf { - void* data; // pointer to the data memory. - size_t length; // number of memory bytes. +class PaddleBuf { + public: + PaddleBuf() = default; + PaddleBuf(PaddleBuf&& other); + // Copy only available when memory is managed externally. + explicit PaddleBuf(const PaddleBuf&); + PaddleBuf& operator=(const PaddleBuf&); + // Do not own the memory. + PaddleBuf(void* data, size_t length) + : data_(data), length_(length), memory_owned_{false} {} + // Own memory. + PaddleBuf(size_t length) + : data_(new char[length]), length_(length), memory_owned_(true) {} + // Resize to `length` bytes. + void Resize(size_t length); + // Reset to external memory. + void Reset(void* data, size_t length); + bool empty() const { return length_ == 0; } + void* data() const { return data_; } + size_t length() const { return length_; } + + ~PaddleBuf() { Free(); } + + private: + void Free(); + void* data_{nullptr}; // pointer to the data memory. + size_t length_{0}; // number of memory bytes. + bool memory_owned_{true}; }; struct PaddleTensor { + PaddleTensor() = default; std::string name; // variable name. std::vector shape; // TODO(Superjomn) for LoD support, add a vector> field if needed. @@ -46,12 +73,12 @@ struct PaddleTensor { }; enum class PaddleEngineKind { - kNative = 0, // Use the native Fluid facility. - kAnakin, // Use Anakin for inference. + kNative = 0, // Use the native Fluid facility. + kAnakin, // Use Anakin for inference. + kAutoMixedTensorRT, // Automatically mix Fluid with TensorRT. // TODO(Superjomn) support following engines latter. // kTensorRT, // Use TensorRT for inference. // kAutoMixedAnakin, // Automatically mix Fluid with Anakin. - // kAutoMixedTensorRT, // Automatically mix Fluid with TensorRT. }; /* @@ -67,8 +94,9 @@ class PaddlePredictor { // Predict an record. // The caller should be responsible for allocating and releasing the memory of - // `inputs`. `inputs` should be alive until Run returns. caller should be - // responsible for releasing the memory of `output_data`. + // `inputs`. `inputs` should be available until Run returns. Caller should be + // responsible for the output tensor's buffer, either allocated or passed from + // outside. virtual bool Run(const std::vector& inputs, std::vector* output_data) = 0; @@ -81,8 +109,7 @@ class PaddlePredictor { // The common configs for all the predictors. struct Config { - std::string model_dir; // path to the model directory. - bool enable_engine{false}; // Enable to execute (part of) the model on + std::string model_dir; // path to the model directory. }; }; @@ -103,6 +130,11 @@ struct AnakinConfig : public PaddlePredictor::Config { int max_batch_size{-1}; }; +struct TensorRTConfig : public NativeConfig { + // Determine whether a subgraph will be executed by TRT. + int min_subgraph_size{1}; +}; + // A factory to help create different predictors. // // FOR EXTENSION DEVELOPER: @@ -113,4 +145,7 @@ struct AnakinConfig : public PaddlePredictor::Config { // Similarly, each engine kind should map to a unique predictor implementation. template std::unique_ptr CreatePaddlePredictor(const ConfigT& config); + +int PaddleDtypeSize(PaddleDType dtype); + } // namespace paddle diff --git a/paddle/contrib/inference/paddle_inference_api_anakin_engine.cc b/paddle/contrib/inference/paddle_inference_api_anakin_engine.cc index 5bafc58fa5..ba2d303147 100644 --- a/paddle/contrib/inference/paddle_inference_api_anakin_engine.cc +++ b/paddle/contrib/inference/paddle_inference_api_anakin_engine.cc @@ -48,7 +48,7 @@ bool PaddleInferenceAnakinPredictor::Run( auto d_tensor_in_p = executor_.get_in(input.name); float *d_data_p = d_tensor_in_p->mutable_data(); if (cudaMemcpy(d_data_p, - static_cast(input.data.data), + static_cast(input.data.data()), d_tensor_in_p->valid_size() * sizeof(float), cudaMemcpyHostToDevice) != 0) { LOG(ERROR) << "copy data from CPU to GPU error"; @@ -65,8 +65,11 @@ bool PaddleInferenceAnakinPredictor::Run( for (auto &output : *output_data) { auto *tensor = executor_.get_out(output.name); output.shape = tensor->shape(); + if (output.data.length() < tensor->valid_size() * sizeof(float)) { + output.data.Resize(tensor->valid_size() * sizeof(float)); + } // Copy data from GPU -> CPU - if (cudaMemcpy(output.data.data, + if (cudaMemcpy(output.data.data(), tensor->mutable_data(), tensor->valid_size() * sizeof(float), cudaMemcpyDeviceToHost) != 0) { diff --git a/paddle/contrib/inference/paddle_inference_api_anakin_engine_tester.cc b/paddle/contrib/inference/paddle_inference_api_anakin_engine_tester.cc index 1d41a5c73e..f92e9d4190 100644 --- a/paddle/contrib/inference/paddle_inference_api_anakin_engine_tester.cc +++ b/paddle/contrib/inference/paddle_inference_api_anakin_engine_tester.cc @@ -37,28 +37,26 @@ TEST(inference, anakin) { float data[1 * 3 * 224 * 224] = {1.0f}; - PaddleBuf buf{.data = data, .length = sizeof(data)}; PaddleTensor tensor{.name = "input_0", .shape = std::vector({1, 3, 224, 224}), - .data = buf, + .data = PaddleBuf(data, sizeof(data)), .dtype = PaddleDType::FLOAT32}; // For simplicity, we set all the slots with the same data. - std::vector paddle_tensor_feeds(1, tensor); + std::vector paddle_tensor_feeds; + paddle_tensor_feeds.emplace_back(std::move(tensor)); - float data_out[1000]; - - PaddleBuf buf_out{.data = data_out, .length = sizeof(data)}; PaddleTensor tensor_out{.name = "prob_out", .shape = std::vector({1000, 1}), - .data = buf_out, + .data = PaddleBuf(), .dtype = PaddleDType::FLOAT32}; - std::vector outputs(1, tensor_out); + std::vector outputs; + outputs.emplace_back(std::move(tensor_out)); ASSERT_TRUE(predictor->Run(paddle_tensor_feeds, &outputs)); - float* data_o = static_cast(outputs[0].data.data); + float* data_o = static_cast(outputs[0].data.data()); for (size_t j = 0; j < 1000; ++j) { LOG(INFO) << "output[" << j << "]: " << data_o[j]; } diff --git a/paddle/contrib/inference/paddle_inference_api_impl.cc b/paddle/contrib/inference/paddle_inference_api_impl.cc index bda2981a14..b1e5b87598 100644 --- a/paddle/contrib/inference/paddle_inference_api_impl.cc +++ b/paddle/contrib/inference/paddle_inference_api_impl.cc @@ -89,6 +89,7 @@ bool NativePaddlePredictor::Init( LOG(ERROR) << "fail to load inference model."; return false; } + ctx_ = executor_->Prepare(*inference_program_, 0); executor_->CreateVariables( *inference_program_, sub_scope_ ? sub_scope_ : scope_.get(), 0); @@ -119,6 +120,7 @@ bool NativePaddlePredictor::Run(const std::vector &inputs, return false; } for (size_t i = 0; i < feed_target_names_.size(); ++i) { + VLOG(4) << "setting " << i << "-th target"; feed_targets[feed_target_names_[i]] = &feeds[i]; } // get fetch variable @@ -130,14 +132,16 @@ bool NativePaddlePredictor::Run(const std::vector &inputs, } // Run the inference program // if share variables, we need not create variables + VLOG(4) << "Run prepared context"; executor_->RunPreparedContext( ctx_.get(), sub_scope_ != nullptr ? sub_scope_ : scope_.get(), &feed_targets, &fetch_targets, false /* don't create variable eatch time */); + VLOG(4) << "Finish prepared context"; if (!GetFetch(fetchs, output_data)) { - LOG(ERROR) << "fail to get fetchs"; + LOG(ERROR) << "fail to get fetches"; return false; } VLOG(3) << "predict cost: " << timer.toc() << "ms"; @@ -178,8 +182,8 @@ bool NativePaddlePredictor::SetFeed(const std::vector &inputs, // TODO(panyx0718): Init LoDTensor from existing memcpy to save a copy. std::memcpy(static_cast(input_ptr), - inputs[i].data.data, - inputs[i].data.length); + inputs[i].data.data(), + inputs[i].data.length()); feeds->push_back(input); } return true; @@ -241,10 +245,11 @@ bool NativePaddlePredictor::GetFetch( } outputs->at(i).shape = shape; - outputs->at(i).data.length = sizeof(float) * data.size(); - outputs->at(i).data.data = malloc(outputs->at(i).data.length); - std::memcpy( - outputs->at(i).data.data, data.data(), outputs->at(i).data.length); + auto &buffer = outputs->at(i).data; + if (buffer.empty() || buffer.length() < sizeof(float) * data.size()) { + buffer.Resize(sizeof(float) * data.size()); + } + std::memcpy(buffer.data(), data.data(), buffer.length()); outputs->at(i).dtype = PaddleDType::FLOAT32; // TODO(panyx0718): support other types? fill tensor name? avoid a copy. } diff --git a/paddle/contrib/inference/paddle_inference_api_impl.h b/paddle/contrib/inference/paddle_inference_api_impl.h index 86d1db7bcc..f9ec6f5544 100644 --- a/paddle/contrib/inference/paddle_inference_api_impl.h +++ b/paddle/contrib/inference/paddle_inference_api_impl.h @@ -22,9 +22,9 @@ #include "paddle/contrib/inference/paddle_inference_api.h" #include "paddle/fluid/framework/ddim.h" -#include "paddle/fluid/framework/init.h" #include "paddle/fluid/framework/lod_tensor.h" #include "paddle/fluid/inference/io.h" +#include "paddle/fluid/platform/init.h" #include "paddle/fluid/platform/profiler.h" namespace paddle { @@ -44,7 +44,7 @@ class NativePaddlePredictor : public PaddlePredictor { ~NativePaddlePredictor() override; - private: + protected: bool SetFeed(const std::vector &input_datas, std::vector *feeds); bool GetFetch(const std::vector &fetchs, diff --git a/paddle/contrib/inference/paddle_inference_api_tensorrt_subgraph_engine.cc b/paddle/contrib/inference/paddle_inference_api_tensorrt_subgraph_engine.cc new file mode 100644 index 0000000000..a11396cee9 --- /dev/null +++ b/paddle/contrib/inference/paddle_inference_api_tensorrt_subgraph_engine.cc @@ -0,0 +1,126 @@ +// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "paddle/contrib/inference/paddle_inference_api.h" +#include "paddle/contrib/inference/paddle_inference_api_impl.h" +#include "paddle/fluid/inference/analysis/analyzer.h" +#include "paddle/fluid/inference/utils/singleton.h" + +namespace paddle { + +using inference::analysis::Argument; +using inference::Singleton; +using inference::analysis::Analyzer; +using framework::proto::ProgramDesc; + +class TensorRTSubgraphPredictor : public NativePaddlePredictor { + public: + explicit TensorRTSubgraphPredictor(const TensorRTConfig& config) + : NativePaddlePredictor(config), config_(config) {} + + bool Init(const std::shared_ptr& parent_scope) { + VLOG(3) << "Predictor::init()"; + + if (config_.use_gpu) { + place_ = paddle::platform::CUDAPlace(config_.device); + } else { + place_ = paddle::platform::CPUPlace(); + } + if (parent_scope) { + scope_ = parent_scope; + sub_scope_ = &(parent_scope->NewScope()); + } else { + paddle::framework::InitDevices(false); + scope_.reset(new paddle::framework::Scope()); + } + + executor_.reset(new paddle::framework::Executor(place_)); + + // Initialize the inference program + if (!config_.model_dir.empty()) { + // Parameters are saved in separate files sited in + // the specified `dirname`. + inference_program_ = paddle::inference::Load( + executor_.get(), scope_.get(), config_.model_dir); + } else if (!config_.prog_file.empty() && !config_.param_file.empty()) { + // All parameters are saved in a single file. + // The file names should be consistent with that used + // in Python API `fluid.io.save_inference_model`. + inference_program_ = paddle::inference::Load( + executor_.get(), scope_.get(), config_.prog_file, config_.param_file); + } else { + LOG(ERROR) << "fail to load inference model."; + return false; + } + + // Analyze inference_program + Argument argument; + argument.origin_program_desc.reset( + new ProgramDesc(*inference_program_->Proto())); + Singleton::Global().Run(&argument); + CHECK(argument.transformed_program_desc); + VLOG(5) << "transformed program:\n" + << argument.transformed_program_desc->SerializeAsString(); + VLOG(5) << "to prepare executor"; + *inference_program_->Proto() = *argument.transformed_program_desc; + ctx_ = executor_->Prepare(*inference_program_, 0); + + VLOG(5) << "to create variables"; + executor_->CreateVariables( + *inference_program_, sub_scope_ ? sub_scope_ : scope_.get(), 0); + + // Get the feed_target_names and fetch_target_names + feed_target_names_ = inference_program_->GetFeedTargetNames(); + fetch_target_names_ = inference_program_->GetFetchTargetNames(); + return true; + } + + private: + TensorRTConfig config_; +}; + +template <> +std::unique_ptr +CreatePaddlePredictor( + const TensorRTConfig& config) { + VLOG(3) << "create TensorRTSubgraphPredictor"; + if (config.use_gpu) { + // 1. GPU memeroy + PADDLE_ENFORCE_GT( + config.fraction_of_gpu_memory, + 0.f, + "fraction_of_gpu_memory in the config should be set to range (0., 1.]"); + PADDLE_ENFORCE_GE(config.device, 0, "Invalid device id %d", config.device); + std::vector flags; + if (config.fraction_of_gpu_memory >= 0.0f || + config.fraction_of_gpu_memory <= 0.95f) { + flags.push_back("dummpy"); + std::string flag = "--fraction_of_gpu_memory_to_use=" + + std::to_string(config.fraction_of_gpu_memory); + flags.push_back(flag); + VLOG(3) << "set flag: " << flag; + framework::InitGflags(flags); + } + } + + std::unique_ptr predictor( + new TensorRTSubgraphPredictor(config)); + if (!dynamic_cast(predictor.get()) + ->Init(nullptr)) { + return nullptr; + } + return std::move(predictor); +} + +} // namespace paddle diff --git a/paddle/contrib/inference/test_paddle_inference_api_impl.cc b/paddle/contrib/inference/test_paddle_inference_api_impl.cc index 5d843010e0..c3649dcb96 100644 --- a/paddle/contrib/inference/test_paddle_inference_api_impl.cc +++ b/paddle/contrib/inference/test_paddle_inference_api_impl.cc @@ -27,13 +27,12 @@ namespace paddle { PaddleTensor LodTensorToPaddleTensor(framework::LoDTensor* t) { PaddleTensor pt; - pt.data.data = t->data(); if (t->type() == typeid(int64_t)) { - pt.data.length = t->numel() * sizeof(int64_t); + pt.data.Reset(t->data(), t->numel() * sizeof(int64_t)); pt.dtype = PaddleDType::INT64; } else if (t->type() == typeid(float)) { - pt.data.length = t->numel() * sizeof(float); + pt.data.Reset(t->data(), t->numel() * sizeof(float)); pt.dtype = PaddleDType::FLOAT32; } else { LOG(FATAL) << "unsupported type."; @@ -79,8 +78,8 @@ void MainWord2Vec(bool use_gpu) { std::vector outputs; ASSERT_TRUE(predictor->Run(paddle_tensor_feeds, &outputs)); ASSERT_EQ(outputs.size(), 1UL); - size_t len = outputs[0].data.length; - float* data = static_cast(outputs[0].data.data); + size_t len = outputs[0].data.length(); + float* data = static_cast(outputs[0].data.data()); for (size_t j = 0; j < len / sizeof(float); ++j) { ASSERT_LT(data[j], 1.0); ASSERT_GT(data[j], -1.0); @@ -103,8 +102,6 @@ void MainWord2Vec(bool use_gpu) { EXPECT_LT(lod_data[i] - data[i], 1e-3); EXPECT_GT(lod_data[i] - data[i], -1e-3); } - - free(outputs[0].data.data); } void MainImageClassification(bool use_gpu) { @@ -143,13 +140,12 @@ void MainImageClassification(bool use_gpu) { std::vector outputs; ASSERT_TRUE(predictor->Run(paddle_tensor_feeds, &outputs)); ASSERT_EQ(outputs.size(), 1UL); - size_t len = outputs[0].data.length; - float* data = static_cast(outputs[0].data.data); + size_t len = outputs[0].data.length(); + float* data = static_cast(outputs[0].data.data()); float* lod_data = output1.data(); for (size_t j = 0; j < len / sizeof(float); ++j) { EXPECT_NEAR(lod_data[j], data[j], 1e-3); } - free(data); } void MainThreadsWord2Vec(bool use_gpu) { @@ -192,8 +188,8 @@ void MainThreadsWord2Vec(bool use_gpu) { // check outputs range ASSERT_EQ(local_outputs.size(), 1UL); - const size_t len = local_outputs[0].data.length; - float* data = static_cast(local_outputs[0].data.data); + const size_t len = local_outputs[0].data.length(); + float* data = static_cast(local_outputs[0].data.data()); for (size_t j = 0; j < len / sizeof(float); ++j) { ASSERT_LT(data[j], 1.0); ASSERT_GT(data[j], -1.0); @@ -205,7 +201,6 @@ void MainThreadsWord2Vec(bool use_gpu) { for (int i = 0; i < refs[tid].numel(); ++i) { EXPECT_NEAR(ref_data[i], data[i], 1e-3); } - free(data); }); } for (int i = 0; i < num_jobs; ++i) { @@ -251,14 +246,13 @@ void MainThreadsImageClassification(bool use_gpu) { // check outputs correctness ASSERT_EQ(local_outputs.size(), 1UL); - const size_t len = local_outputs[0].data.length; - float* data = static_cast(local_outputs[0].data.data); + const size_t len = local_outputs[0].data.length(); + float* data = static_cast(local_outputs[0].data.data()); float* ref_data = refs[tid].data(); - EXPECT_EQ(refs[tid].numel(), len / sizeof(float)); + EXPECT_EQ((size_t)refs[tid].numel(), len / sizeof(float)); for (int i = 0; i < refs[tid].numel(); ++i) { EXPECT_NEAR(ref_data[i], data[i], 1e-3); } - free(data); }); } for (int i = 0; i < num_jobs; ++i) { diff --git a/paddle/contrib/inference/test_paddle_inference_api_tensorrt_subgraph_engine.cc b/paddle/contrib/inference/test_paddle_inference_api_tensorrt_subgraph_engine.cc new file mode 100644 index 0000000000..b100630dbe --- /dev/null +++ b/paddle/contrib/inference/test_paddle_inference_api_tensorrt_subgraph_engine.cc @@ -0,0 +1,64 @@ +// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include +#include +#include +#include "paddle/contrib/inference/paddle_inference_api.h" + +namespace paddle { + +DEFINE_string(dirname, "", "Directory of the inference model."); + +void Main(bool use_gpu) { + //# 1. Create PaddlePredictor with a config. + TensorRTConfig config; + config.model_dir = FLAGS_dirname + "word2vec.inference.model"; + config.use_gpu = use_gpu; + config.fraction_of_gpu_memory = 0.15; + config.device = 0; + auto predictor = + CreatePaddlePredictor(config); + + for (int batch_id = 0; batch_id < 3; batch_id++) { + //# 2. Prepare input. + int64_t data[4] = {1, 2, 3, 4}; + + PaddleTensor tensor{.name = "", + .shape = std::vector({4, 1}), + .data = PaddleBuf(data, sizeof(data)), + .dtype = PaddleDType::INT64}; + + // For simplicity, we set all the slots with the same data. + std::vector slots(4, tensor); + + //# 3. Run + std::vector outputs; + CHECK(predictor->Run(slots, &outputs)); + + //# 4. Get output. + ASSERT_EQ(outputs.size(), 1UL); + LOG(INFO) << "output buffer size: " << outputs.front().data.length(); + const size_t num_elements = outputs.front().data.length() / sizeof(float); + // The outputs' buffers are in CPU memory. + for (size_t i = 0; i < std::min(5UL, num_elements); i++) { + LOG(INFO) << static_cast(outputs.front().data.data())[i]; + } + } +} + +TEST(paddle_inference_api_tensorrt_subgraph_engine, main) { Main(true); } + +} // namespace paddle \ No newline at end of file diff --git a/paddle/contrib/tape/README.md b/paddle/contrib/tape/README.md deleted file mode 100644 index 16c22a45d5..0000000000 --- a/paddle/contrib/tape/README.md +++ /dev/null @@ -1,252 +0,0 @@ -# Dynamic Graph on Fluid - -PaddlePaddle Fluid is targeting the autodiff without tape, which, however, is very -challenging and we are still way from there. DyNet and PyTorch provide a good design -idea, the *tape*, that significantly eases the challenge. Also, DyNet provides -a C++ API that is as convenient as Python but with higher efficiency and could -conveniently integrate with industrial/production systems. This package, `tape`, -combines the good of - -1. tape from PyTorch and DyNet -2. C++ API and core from DyNet -3. rich set of operators from PaddlePaddle - -## Overview - -We can implement Dynet-like Tape(See this [survey](https://github.com/PaddlePaddle/Paddle/blob/develop/doc/survey/dynamic_graph.md)) -by wrapping Paddle Fluid's `Operator` and `Variable`. - -The user API is straight forward since - -1. it is imperative. And it uses host language's control flow logic. -1. it avoids extra concepts such as `Scope` and `Executor`. - -All of these benefits come at the cost of just adding one line `reset_global_tape` -at every iteration. - -## Code Structure - -In short, the `Tape` contains a vector of `OpHandle`s. And an `OpHandle` contains its -`type`, the pointers to the `Variable`s, and necessary attributes. - -```c++ -class Variable { -public: - VriableHandle Grad(); // returns its gradient variable -private: - framework::VarDesc desc_; // compile time infershape, necessary for lazy execution - framework::Variable var_; // run time variable, holds data memory -}; - -using VariableHandle = shared_ptr; - -struct OpHandle { - string type_; - map> inputs_; - map> outputs_; - AttributeMap attrs_; -}; - -class Tape { -public: - void AddOp(OpHandle); // add op - void Forward(); // execute the tape_ - void Backward(); // execute the backward of the tape_ -private: - vector tape_; -}; -``` - -We uses `Function` to indicate layers. It takes care of parameter -initialization and `AddOp` to the Tape when it is called. - -```c++ -class Linear { - public: - Linear(int in_dim, int out_dim, const std::string &act) - : w_(new Variable("LinearWeight")), - b_(new Variable("LinearBias")), - act_(act) { - Tape init_tape; - - std::string initializer = "fill_constant"; - framework::AttributeMap attrs; - attrs["dtype"] = paddle::framework::proto::VarType::Type::VarType_Type_FP32; - attrs["shape"] = std::vector{in_dim, out_dim}; - attrs["value"] = 1.0f; - init_tape.AddOp(initializer, {}, {{"Out", {w_}}}, attrs); - - attrs["dtype"] = paddle::framework::proto::VarType::Type::VarType_Type_FP32; - attrs["shape"] = std::vector{out_dim}; - attrs["value"] = 1.0f; - init_tape.AddOp(initializer, {}, {{"Out", {b_}}}, attrs); - - init_tape.Forward(); - } - - VariableHandle operator()(VariableHandle input) { - VariableHandle pre_bias(new Variable("linear")); - get_global_tape().AddOp("mul", - {{"X", {input}}, {"Y", {w_}}}, - {{"Out", {pre_bias}}}, - {{"x_num_col_dims", 1}, {"y_num_col_dims", 1}}); - VariableHandle pre_act(new Variable("linear")); - get_global_tape().AddOp("elementwise_add", - {{"X", {pre_bias}}, {"Y", {b_}}}, - {{"Out", {pre_act}}}, - {{"axis", 1}}); - VariableHandle post_act(new Variable("linear")); - get_global_tape().AddOp(act_, - {{"X", {pre_act}}}, - {{"Out", {post_act}}}, - {}); - return post_act; - } - - std::vector Params() { return {w_, b_}; } - - private: - VariableHandle w_; - VariableHandle b_; - std::string act_; -}; -``` - -## User API - -```c++ -// Model function -paddle::tape::Linear linear1(3, 3, "relu"); // init weight and bias -paddle::tape::Linear linear2(3, 3, "relu"); // init weight and bias -paddle::tape::Mean mean; - -// Optimizer -paddle::tape::SGD sgd(0.001); - -// Data Feeder -paddle::tape::Fill data_feeder(...); -VariableHandle input(new paddle::tape::Variable("input")); -VariableHandle label(new paddle::tape::Variable("label")); - -for (int i = 0; i < 2; ++i) { - reset_global_tape(); - - data_feeder(input, label); - - auto loss = softmax(linear2(linear1(input)), label); // compile time InferShape & InferVarType - LOG(INFO) << loss.value(); // Run forward up to loss - - // Run backward, store gradient of w at w->Grad() - get_global_tape.Backward(loss); - - // Update w - sgd(linear1.Params()); - sgd(linear2.Params()); -} -``` - -
- -digraph G { - - subgraph cluster_0 { - node [shape=record,style=filled]; - style=filled; - color=lightgrey; - linear1 [label="{type: mul | {input | {X: before_mul1 | Y: weight1}} | {output | Out: before_bias1}}"]; - elementwise_add1 [label="{type: elementwise_add | {input | {X: before_bias1 | Y: bias1}} | {output | Out: before_act1}}"]; - relu1 [label="{type: relu | {input | {X: before_act1 }} | {output | Out: after_act1}}"]; - - linear1 -> elementwise_add1->relu1; - label = "forward tape"; - } - - linear1:before_mul1->before_mul1 - linear1:weight1->weight1 - linear1:before_bias1->before_bias1 - - elementwise_add1:bias1->bias1 - elementwise_add1:before_bias1->before_bias1 - elementwise_add1:before_act1->before_act1 - - relu1:before_act1->before_act1 - relu1:after_act1->after_act1 - - subgraph cluster_1 { - node [shape=record,style=filled]; - style=filled; - color=lightgrey; - linear1_grad [label="{type: mul_grad | {input | {X: before_mul1 | Y: weight1| Out_grad: before_bias1_grad}} | {output |{X_grad: before_mul1_grad | Y_grad: weight1_grad}}}"]; - - elementwise_add1_grad [label="{type: elementwise_add_grad | {input | Out_grad: before_act1_grad} | {output |{X_grad: before_bias1_grad | Y_grad: bias1_grad}}}"]; - - relu1_grad [label="{type: relu_grad | {input | Out_grad: after_act1_grad} | {ouput | {X_grad: before_act1_grad }}}"]; - - linear1_grad -> elementwise_add1_grad ->relu1_grad [dir=back]; - label = "backward tape"; - } - - relu1_grad:after_act1_grad->after_act1_grad - relu1_grad:before_act1_grad->before_act1_grad - - elementwise_add1_grad:before_act1_grad->before_act1_grad - elementwise_add1_grad:before_bias1_grad->before_bias1_grad - elementwise_add1_grad:bias1_grad->bias1_grad - - linear1_grad:before_mul1->before_mul1 - linear1_grad:weight1->weight1 - linear1_grad:before_bias1_grad->before_bias1_grad - linear1_grad:before_mul1_grad->before_mul1_grad - linear1_grad:weight1_grad->weight1_grad - - - subgraph cluster_2 { - node [shape=record]; - label = "Linear1"; - weight1 - bias1 - } - - weight1 -> weight1_grad [ label="Grad()", style="dashed" ]; - bias1 -> bias1_grad [ label="Grad()", style="dashed"]; - - - -} -
- -![Image](https://github.com/tonyyang-svail/Paddle/blob/cpp_tap/paddle/contrib/tape/computation_graph.png) - -## Code Reuse - -We want to stay close to Paddle Fluid as much as possible. - -### Reuse All Operators - -As all Ops are registered at `OpInfoMap`, the effort of adding a new `Function` -is about 10 lines of code, similar to expose an operator to Python. - -### Reuse Compile Time InferShape and InferVarType - -Note that all the symbolic information is stored at `tape::Varaible::desc_`, instead -of `ProgramDesc.block.vars`, we create a temporary `BlockDesc` to do `InferShape` and -`InferVarType` every time we `AddOp` to the tape. - -### Reuse Operator::Run - -We use smart pointer, instead of `Scope`, to manage memory. So we create a temporary -`Scope` for every `Operator::Run()`. - -## Possible Feature - -### Release Memory on Backward - -We can release memory aggressively. During backward, we can delete the OpHandle once -we have finished its backward. Since all the variable is managed by smart pointer, the -memory is automatically released when its `ref_count` goes to 0. - -### Kernel Fusion - -As a symbolic representation of the Tape is constructed first before the actual -execution, it would be possible to perform graph optimization. One use case is kernel -fusion. diff --git a/paddle/contrib/tape/computation_graph.png b/paddle/contrib/tape/computation_graph.png deleted file mode 100644 index 6cf5ead735..0000000000 Binary files a/paddle/contrib/tape/computation_graph.png and /dev/null differ diff --git a/paddle/contrib/tape/function.h b/paddle/contrib/tape/function.h deleted file mode 100644 index 8c9694d9a2..0000000000 --- a/paddle/contrib/tape/function.h +++ /dev/null @@ -1,131 +0,0 @@ -// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#pragma once - -#include - -#include "paddle/contrib/tape/tape.h" -#include "paddle/contrib/tape/variable.h" -#include "paddle/fluid/framework/type_defs.h" - -namespace paddle { -namespace tape { - -class Function {}; - -class Fill { - public: - Fill(const std::string &initializer, const framework::AttributeMap &attrs) - : initializer_(initializer), attrs_(attrs) {} - - void operator()(VariableHandle var) { - get_global_tape().AddOp(initializer_, {}, {{"Out", {var}}}, attrs_); - } - - private: - const std::string initializer_; - const framework::AttributeMap attrs_; -}; - -class Mean { - public: - VariableHandle operator()(VariableHandle var) { - VariableHandle out(new Variable("mean")); - get_global_tape().AddOp("mean", {{"X", {var}}}, {{"Out", {out}}}, {}); - return out; - } -}; - -class Linear { - public: - Linear(int in_dim, int out_dim, const std::string &act) - : w_(new Variable("LinearWeight")), - b_(new Variable("LinearBias")), - act_(act) { - Tape init_tape; - - std::string initializer = "fill_constant"; - framework::AttributeMap attrs; - attrs["dtype"] = paddle::framework::proto::VarType::Type::VarType_Type_FP32; - attrs["shape"] = std::vector{in_dim, out_dim}; - attrs["value"] = 1.0f; - init_tape.AddOp(initializer, {}, {{"Out", {w_}}}, attrs); - - attrs["dtype"] = paddle::framework::proto::VarType::Type::VarType_Type_FP32; - attrs["shape"] = std::vector{out_dim}; - attrs["value"] = 1.0f; - init_tape.AddOp(initializer, {}, {{"Out", {b_}}}, attrs); - - init_tape.Forward(); - } - - VariableHandle operator()(VariableHandle input) { - VariableHandle pre_bias(new Variable("linear")); - get_global_tape().AddOp("mul", - {{"X", {input}}, {"Y", {w_}}}, - {{"Out", {pre_bias}}}, - {{"x_num_col_dims", 1}, {"y_num_col_dims", 1}}); - VariableHandle pre_act(new Variable("linear")); - get_global_tape().AddOp("elementwise_add", - {{"X", {pre_bias}}, {"Y", {b_}}}, - {{"Out", {pre_act}}}, - {{"axis", 1}}); - VariableHandle post_act(new Variable("linear")); - get_global_tape().AddOp( - act_, {{"X", {pre_act}}}, {{"Out", {post_act}}}, {}); - return post_act; - } - - std::vector Params() { return {w_, b_}; } - - private: - VariableHandle w_; - VariableHandle b_; - std::string act_; -}; - -class SGD { - public: - SGD(float learning_rate) : learning_rate_(new Variable("sgd")) { - Tape init_tape; - - std::string initializer = "fill_constant"; - framework::AttributeMap attrs; - attrs["dtype"] = paddle::framework::proto::VarType::Type::VarType_Type_FP32; - attrs["shape"] = std::vector{1}; - attrs["value"] = learning_rate; - init_tape.AddOp(initializer, {}, {{"Out", {learning_rate_}}}, attrs); - - init_tape.Forward(); - } - - void operator()(VariableHandle input) { - PADDLE_ENFORCE(get_global_tape().HasBeenBackwarded(), - "optimization must happen after the backward"); - Tape temp_tape; - temp_tape.AddOp("sgd", - {{"Param", {input}}, - {"LearningRate", {learning_rate_}}, - {"Grad", {input->Grad()}}}, - {{"ParamOut", {input}}}, - {}); - temp_tape.Forward(); - } - - private: - VariableHandle learning_rate_; -}; -} -} diff --git a/paddle/contrib/tape/tape.cc b/paddle/contrib/tape/tape.cc deleted file mode 100644 index 531499b6fe..0000000000 --- a/paddle/contrib/tape/tape.cc +++ /dev/null @@ -1,265 +0,0 @@ -// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#include "paddle/contrib/tape/tape.h" - -#include -#include -#include -#include -#include - -#include "paddle/fluid/framework/data_type.h" -#include "paddle/fluid/framework/dim.h" -#include "paddle/fluid/framework/op_registry.h" -#include "paddle/fluid/framework/operator.h" -#include "paddle/fluid/framework/scope.h" -#include "paddle/fluid/platform/place.h" -#include "paddle/fluid/pybind/pybind.h" - -namespace paddle { -namespace tape { - -// borrowed from -// https://stackoverflow.com/questions/874134/find-if-string-ends-with-another-string-in-c -inline bool ends_with(std::string const &value, std::string const &ending) { - if (ending.size() > value.size()) return false; - return std::equal(ending.rbegin(), ending.rend(), value.rbegin()); -} - -std::ostream &operator<<(std::ostream &os, const framework::VarDesc &var_desc) { - os << var_desc.Name(); - os << "[" << var_desc.GetType() << "]"; - os << "[" << var_desc.GetDataType() << "]"; - os << "{"; - for (auto &i : var_desc.GetShape()) { - os << i << ","; - } - os << "}"; - return os; -} - -std::string to_string(const std::string &type, - const VariableHandleMap &in_vars, - const VariableHandleMap &out_vars, - const framework::AttributeMap &attrs) { - std::stringstream ss; - ss << type << " "; - for (auto ¶m_name : in_vars) { - for (auto &var : param_name.second) { - ss << param_name.first << ":(" << var->Desc() << ") "; - } - } - for (auto ¶m_name : out_vars) { - for (auto &var : param_name.second) { - ss << param_name.first << ":(" << var->Desc() << ") "; - } - } - return ss.str(); -} - -framework::OpDesc CreateOpDesc(const std::string &type, - const VariableHandleMap &in_vars, - const VariableHandleMap &out_vars, - const framework::AttributeMap &attrs) { - framework::VariableNameMap inputs; - for (auto ¶m_name : in_vars) { - for (auto &var : param_name.second) { - inputs[param_name.first].emplace_back(var->Name()); - } - } - framework::VariableNameMap outputs; - for (auto ¶m_name : out_vars) { - for (auto &var : param_name.second) { - outputs[param_name.first].emplace_back(var->Name()); - } - } - return framework::OpDesc(type, inputs, outputs, attrs); -} - -void InferShapeAndVarType(const std::string &type, - const VariableHandleMap &in_vars, - VariableHandleMap *out_vars, - const framework::AttributeMap &attrs) { - framework::OpDesc op_desc = CreateOpDesc(type, in_vars, *out_vars, attrs); - - // Create a temporary block for compile-time - framework::ProgramDesc program_desc; - framework::BlockDesc *block_desc = program_desc.MutableBlock(0); - PADDLE_ENFORCE(block_desc); - - for (auto ¶m_name : in_vars) { - for (auto &var : param_name.second) { - *block_desc->Var(var->Name())->Proto() = *var->MutableDesc()->Proto(); - } - } - for (auto ¶m_name : *out_vars) { - for (auto &var : param_name.second) { - *block_desc->Var(var->Name())->Proto() = *var->MutableDesc()->Proto(); - } - } - - LOG(INFO) << "- " << to_string(type, in_vars, *out_vars, attrs); - op_desc.InferShape(*block_desc); - op_desc.InferVarType(block_desc); - for (auto ¶m_name : *out_vars) { - for (auto &var : param_name.second) { - *var->MutableDesc()->Proto() = *block_desc->Var(var->Name())->Proto(); - } - } - LOG(INFO) << "+ " << to_string(type, in_vars, *out_vars, attrs); -} - -void Tape::AddOp(const std::string &type, - const VariableHandleMap &in_vars, - VariableHandleMap out_vars, - const framework::AttributeMap &attrs) { - InferShapeAndVarType(type, in_vars, &out_vars, attrs); - tape_.emplace_back(type, in_vars, out_vars, attrs); -} - -// Temporary Scope for Operator::Run() -class ScopeWrapper : public framework::Scope { - public: - ScopeWrapper(const VariableHandleMap &in_vars, - const VariableHandleMap &out_vars) { - for (auto &v : in_vars) { - for (auto &vv : v.second) { - if (!vars_.count(vv->Name())) { - vars_[vv->Name()].reset(vv->Var()); - } - } - } - for (auto &v : out_vars) { - for (auto &vv : v.second) { - if (!vars_.count(vv->Name())) { - vars_[vv->Name()].reset(vv->Var()); - } - } - } - } - - ~ScopeWrapper() { - for (auto &pair : vars_) { - pair.second.release(); - } - } -}; - -void Tape::Forward() { - LOG(INFO) << "Starting forward -------------------------"; - PADDLE_ENFORCE(!has_been_backwarded_); - while (current_position_ < tape_.size()) { - OpHandle &op = tape_[current_position_]; - - // Create Output Tensor, this is only necessary for OpWithKernel - for (auto ¶m2var : op.outputs_) { - for (auto &var : param2var.second) { - var->InitializeVariable(); - } - } - - framework::OpDesc op_desc = - CreateOpDesc(op.type_, op.inputs_, op.outputs_, op.attrs_); - ScopeWrapper scope(op.inputs_, op.outputs_); - framework::OpRegistry::CreateOp(op_desc)->Run(scope, platform::CPUPlace()); - current_position_++; - } - - LOG(INFO) << "Finishing forward -------------------------"; -} - -void Tape::Backward(VariableHandle target) { - PADDLE_ENFORCE(!has_been_backwarded_); - - Forward(); - - // TODO(tonyyang-svail): check output of last op is target - backward_tape_.reset(new Tape()); - - framework::AttributeMap attrs; - - // FIXME(tonyyang-svail): Need to infer_data_type - attrs["dtype"] = framework::proto::VarType::Type::VarType_Type_FP32; - attrs["shape"] = std::vector{1}; - attrs["value"] = 1.0f; - backward_tape_->AddOp( - "fill_constant", {}, {{"Out", {target->Grad()}}}, attrs); - - for (auto it = tape_.rbegin(); it != tape_.rend(); ++it) { - framework::OpDesc op_desc = - CreateOpDesc(it->type_, it->inputs_, it->outputs_, it->attrs_); - std::unordered_map grad_to_var; - std::vector> grad_op_descs = - framework::OpInfoMap::Instance() - .Get(op_desc.Type()) - .GradOpMaker()(op_desc, {}, &grad_to_var, {}); - - for (auto &op_desc : grad_op_descs) { - std::unordered_map name2var; - for (auto ¶m2vars : it->inputs_) { - for (auto &a : param2vars.second) { - name2var[a->Name()] = a; - } - } - for (auto ¶m2vars : it->outputs_) { - for (auto &a : param2vars.second) { - name2var[a->Name()] = a; - } - } - - VariableHandleMap in_vars; - VariableHandleMap out_vars; - std::map - loop_over{{&op_desc->Inputs(), &in_vars}, - {&op_desc->Outputs(), &out_vars}}; - for (auto &each : loop_over) { - auto &vmp = *each.first; - auto &vhm = *each.second; - for (auto &p2a : vmp) { - for (auto &argu : p2a.second) { - if (name2var.count(argu)) { - vhm[p2a.first].push_back(name2var[argu]); - } else { - PADDLE_ENFORCE(ends_with(argu, framework::kGradVarSuffix), - argu.c_str()); - std::string name = argu.substr( - 0, argu.size() - std::strlen(framework::kGradVarSuffix)); - PADDLE_ENFORCE(name2var.count(name), name.c_str()); - vhm[p2a.first].push_back(name2var[name]->Grad()); - } - } - } - } - - backward_tape_->AddOp( - op_desc->Type(), in_vars, out_vars, op_desc->GetAttrMap()); - } - - // TODO(tonyyang-svail): how to fill empty grad? - // TODO(tonyyang-svail): Sum var grad is necessary - } - - backward_tape_->Forward(); - has_been_backwarded_ = true; -} - -Tape &get_global_tape() { - static Tape T; - return T; -} - -void reset_global_tape() { get_global_tape() = Tape(); } -} -} diff --git a/paddle/contrib/tape/tape.h b/paddle/contrib/tape/tape.h deleted file mode 100644 index ed79de17a7..0000000000 --- a/paddle/contrib/tape/tape.h +++ /dev/null @@ -1,64 +0,0 @@ -// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. -#pragma once - -#include -#include -#include -#include - -#include "paddle/contrib/tape/variable.h" - -namespace paddle { -namespace tape { - -using VariableHandleMap = std::map>; - -struct OpHandle { - OpHandle(const std::string &type, - const VariableHandleMap &in_vars, - const VariableHandleMap &out_vars, - const framework::AttributeMap &attrs) - : type_(type), inputs_(in_vars), outputs_(out_vars), attrs_(attrs) {} - - std::string type_; - VariableHandleMap inputs_; - VariableHandleMap outputs_; - framework::AttributeMap attrs_; -}; - -class Tape { - public: - void AddOp(const std::string &type, - const VariableHandleMap &in_vars, - VariableHandleMap out_vars, - const framework::AttributeMap &attrs); - void Forward(); - void Backward(VariableHandle target); - - bool HasBeenBackwarded() { return has_been_backwarded_; } - - private: - bool has_been_backwarded_ = false; - size_t current_position_ = 0; - - std::vector tape_; - std::shared_ptr backward_tape_; -}; - -Tape &get_global_tape(); - -void reset_global_tape(); -} -} diff --git a/paddle/contrib/tape/test_tape.cc b/paddle/contrib/tape/test_tape.cc deleted file mode 100644 index e9bfd21a71..0000000000 --- a/paddle/contrib/tape/test_tape.cc +++ /dev/null @@ -1,61 +0,0 @@ -// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#include "gtest/gtest.h" -#include "paddle/contrib/tape/function.h" - -using namespace paddle::tape; - -TEST(Tape, TestMLP) { - LOG(INFO) << "TestMLP"; - Linear linear1(3, 3, "relu"); - Linear linear2(3, 3, "relu"); - Mean mean; - - SGD sgd(0.001); - - std::string initializer = "fill_constant"; - paddle::framework::AttributeMap attrs; - attrs["dtype"] = paddle::framework::proto::VarType::Type::VarType_Type_FP32; - attrs["shape"] = std::vector{3, 3}; - attrs["value"] = 1.0f; - Fill filler(initializer, attrs); - - for (int i = 0; i < 2; ++i) { - reset_global_tape(); - - VariableHandle input(new Variable("input")); - filler(input); - - auto loss = mean(linear2(linear1(input))); - - get_global_tape().Backward(loss); - - for (auto w : linear1.Params()) { - sgd(w); - } - for (auto w : linear2.Params()) { - sgd(w); - } - } -} - -int main(int argc, char** argv) { - std::vector places; - places.emplace_back(paddle::platform::CPUPlace()); - paddle::platform::DeviceContextPool::Init(places); - - testing::InitGoogleTest(&argc, argv); - return RUN_ALL_TESTS(); -} diff --git a/paddle/contrib/tape/variable.h b/paddle/contrib/tape/variable.h deleted file mode 100644 index 35c328e69c..0000000000 --- a/paddle/contrib/tape/variable.h +++ /dev/null @@ -1,85 +0,0 @@ -// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. -#pragma once - -#include - -#include "paddle/fluid/framework/operator.h" // framework::kGradVarSuffix -#include "paddle/fluid/framework/program_desc.h" -#include "paddle/fluid/framework/variable.h" - -namespace paddle { -namespace tape { - -class Variable; -using VariableHandle = std::shared_ptr; - -/* - * Combination of - * framework::VarDesc desc_; - * framework::Variable var_; - */ -class Variable { - public: - Variable(const std::string pre_fix) - : desc_(pre_fix + std::to_string(count())) {} - - Variable(const std::string pre_fix, bool is_grad) - : desc_(pre_fix + (is_grad ? framework::kGradVarSuffix - : std::to_string(count()))) {} - - ~Variable() { LOG(INFO) << "Deleting " << Name(); } - - // Instantiate LoDTensor/SelectedRow - void InitializeVariable(); - - VariableHandle Grad() { - if (grad_.expired()) { - VariableHandle new_grad(new Variable(desc_.Name(), true)); - grad_ = new_grad; - return new_grad; - } else { - return VariableHandle(grad_); - } - } - - // Stochastic Gradient Descent with Momentum - // VariableHandle Momentum (); - - // void init(const std::string& initializer, - // const framework::AttributeMap& attrs); - - // void value() {}; - - const framework::VarDesc& Desc() const { return desc_; } - framework::VarDesc* MutableDesc() { return &desc_; } - - // TODO(tonyyang-svail): No need to expose name - std::string Name() const { return desc_.Name(); } - - framework::Variable* Var() { return &var_; } - - private: - int count() { - static int counter = 0; - return counter++; - } - - framework::VarDesc desc_; - framework::Variable var_; - - std::weak_ptr grad_; -}; -} -} diff --git a/paddle/fluid/framework/CMakeLists.txt b/paddle/fluid/framework/CMakeLists.txt index 6286dda4a5..ec252929d5 100644 --- a/paddle/fluid/framework/CMakeLists.txt +++ b/paddle/fluid/framework/CMakeLists.txt @@ -21,12 +21,13 @@ endif() cc_test(eigen_test SRCS eigen_test.cc DEPS tensor) -nv_test(mixed_vector_test SRCS mixed_vector_test.cu DEPS place memory device_context init) +nv_test(mixed_vector_test SRCS mixed_vector_test.cu DEPS place memory device_context tensor) cc_library(lod_tensor SRCS lod_tensor.cc DEPS ddim place tensor framework_proto recordio) cc_test(lod_tensor_test SRCS lod_tensor_test.cc DEPS lod_tensor memory) -nv_test(lod_tensor_gpu_test SRCS lod_tensor_test.cu DEPS lod_tensor init) +nv_test(lod_tensor_gpu_test SRCS lod_tensor_test.cu DEPS lod_tensor) cc_library(reader SRCS reader.cc DEPS lod_tensor ddim) +cc_test(reader_test SRCS reader_test.cc DEPS reader) cc_test(variable_test SRCS variable_test.cc) @@ -38,7 +39,7 @@ cc_test(scope_test SRCS scope_test.cc DEPS scope) cc_library(data_device_transform SRCS data_device_transform.cc DEPS tensor) nv_test(data_device_transform_test SRCS data_device_transform_test.cu - DEPS operator op_registry init math_function) + DEPS operator op_registry device_context math_function) if(WITH_GPU) nv_library(data_type_transform SRCS data_type_transform.cu DEPS tensor) @@ -63,7 +64,7 @@ cc_library(op_info SRCS op_info.cc DEPS attribute framework_proto) cc_library(shape_inference SRCS shape_inference.cc DEPS ddim attribute device_context) cc_library(operator SRCS operator.cc DEPS op_info device_context tensor scope glog shape_inference data_transform lod_tensor profiler) -cc_test(operator_test SRCS operator_test.cc DEPS operator op_registry init) +cc_test(operator_test SRCS operator_test.cc DEPS operator op_registry device_context) cc_library(proto_desc SRCS var_desc.cc op_desc.cc block_desc.cc program_desc.cc DEPS shape_inference op_info operator glog) cc_library(op_registry SRCS op_registry.cc DEPS op_proto_maker op_info operator glog proto_desc) @@ -101,14 +102,14 @@ cc_test(var_type_inference_test SRCS var_type_inference_test.cc DEPS op_registry cc_library(selected_rows SRCS selected_rows.cc DEPS tensor) cc_test(selected_rows_test SRCS selected_rows_test.cc DEPS selected_rows) -cc_library(init SRCS init.cc DEPS gflags device_context place stringpiece operator) -cc_test(init_test SRCS init_test.cc DEPS init) - cc_test(op_kernel_type_test SRCS op_kernel_type_test.cc DEPS place device_context framework_proto) cc_test(cow_ptr_tests SRCS details/cow_ptr_test.cc) # cc_test(channel_test SRCS channel_test.cc) cc_test(tuple_test SRCS tuple_test.cc ) -cc_test(concurrency_test SRCS concurrency_test.cc DEPS go_op channel_close_op channel_create_op - channel_send_op channel_recv_op sum_op select_op elementwise_add_op compare_op - conditional_block_op while_op assign_op print_op executor proto_desc) + +# disable test temporarily. +# TODO https://github.com/PaddlePaddle/Paddle/issues/11971 +# cc_test(concurrency_test SRCS concurrency_test.cc DEPS go_op channel_close_op channel_create_op +# channel_send_op channel_recv_op sum_op select_op elementwise_add_op compare_op +# conditional_block_op while_op assign_op print_op executor proto_desc) diff --git a/paddle/fluid/framework/data_device_transform_test.cu b/paddle/fluid/framework/data_device_transform_test.cu index a91fe5c99d..f2c55e533a 100644 --- a/paddle/fluid/framework/data_device_transform_test.cu +++ b/paddle/fluid/framework/data_device_transform_test.cu @@ -14,13 +14,13 @@ limitations under the License. */ #include "gtest/gtest.h" -#include "paddle/fluid/framework/init.h" #include "paddle/fluid/framework/lod_tensor.h" #include "paddle/fluid/framework/op_info.h" #include "paddle/fluid/framework/op_registry.h" #include "paddle/fluid/operators/elementwise_op_function.h" #include "paddle/fluid/operators/math/math_function.h" #include "paddle/fluid/platform/device_context.h" +#include "paddle/fluid/platform/init.h" namespace paddle { namespace framework { diff --git a/paddle/fluid/framework/data_layout_transform.cc b/paddle/fluid/framework/data_layout_transform.cc index 5b8dfc57ba..cd00b7de73 100644 --- a/paddle/fluid/framework/data_layout_transform.cc +++ b/paddle/fluid/framework/data_layout_transform.cc @@ -147,10 +147,9 @@ void TransDataLayoutFromMKLDNN(const OpKernelType& kernel_type_for_var, "Input tensor type is not supported: ", in.type().name()); memory::data_type out_type = in_type; - memory::format in_format = - in_tz.size() == 2 ? memory::format::nc : in.format(); - memory::format out_format = - out_tz.size() == 2 ? memory::format::nc : ToMKLDNNFormat(out_layout); + auto in_format = platform::MKLDNNFormatForSize(in_tz.size(), in.format()); + auto out_format = + platform::MKLDNNFormatForSize(in_tz.size(), ToMKLDNNFormat(out_layout)); void* in_data = GetDataFromTensor(in, in_type); diff --git a/paddle/fluid/framework/data_layout_transform.h b/paddle/fluid/framework/data_layout_transform.h index 2ba84ce57f..90bb206ec6 100644 --- a/paddle/fluid/framework/data_layout_transform.h +++ b/paddle/fluid/framework/data_layout_transform.h @@ -61,6 +61,7 @@ inline MKLDNNDataType ToMKLDNNDataType(const std::type_index type) { if (iter != dict.end()) return iter->second; return MKLDNNDataType::data_undef; } + #endif void TransDataLayoutFromMKLDNN(const OpKernelType& kernel_type_for_var, diff --git a/paddle/fluid/framework/data_transform.cc b/paddle/fluid/framework/data_transform.cc index b8fcc92697..8287222450 100644 --- a/paddle/fluid/framework/data_transform.cc +++ b/paddle/fluid/framework/data_transform.cc @@ -18,17 +18,21 @@ limitations under the License. */ #include "paddle/fluid/framework/data_layout_transform.h" #include "paddle/fluid/framework/data_type_transform.h" +#ifdef PADDLE_WITH_MKLDNN +#include "paddle/fluid/platform/mkldnn_helper.h" +#endif + namespace paddle { namespace framework { -static void PassTensorData(Tensor* from, Tensor* to) { +static void PassTensorData(Tensor *from, Tensor *to) { to->ShareDataWith(*from); *from = Tensor(); } -void DataTransform(const OpKernelType& expected_kernel_type, - const OpKernelType& kernel_type_for_var, - const Tensor& input_tensor, Tensor* output_tensor) { +void TransformData(const OpKernelType &expected_kernel_type, + const OpKernelType &kernel_type_for_var, + const Tensor &input_tensor, Tensor *output_tensor) { bool transformed = false; Tensor in; in.ShareDataWith(input_tensor); @@ -47,9 +51,13 @@ void DataTransform(const OpKernelType& expected_kernel_type, #ifdef PADDLE_WITH_MKLDNN // Case1 - transform from Non-MKLDNN OPKernel to MKLDNN OPKernel // Just set layout/format. No real transform occur + + auto out_format = platform::MKLDNNFormatForSize(in.dims().size(), + ToMKLDNNFormat(lin)); + out.ShareDataWith(input_tensor); out.set_layout(DataLayout::kMKLDNN); - out.set_format(ToMKLDNNFormat(lin)); + out.set_format(out_format); #endif } else { // Case2 - transfrom from MKLDNN OPKernel to Non-MKLDNN OPKernel @@ -85,17 +93,17 @@ void DataTransform(const OpKernelType& expected_kernel_type, output_tensor->ShareDataWith(in); } -void CopyVariableWithTensor(const Variable& in_var, const Tensor& tensor, - Variable* out_var) { +void SetTensorToVariable(const Variable &in_var, const Tensor &tensor, + Variable *out_var) { if (in_var.IsType()) { - auto& in_lod_tensor = in_var.Get(); - auto* tran_lod_tensor = out_var->GetMutable(); + auto &in_lod_tensor = in_var.Get(); + auto *tran_lod_tensor = out_var->GetMutable(); tran_lod_tensor->set_lod(in_lod_tensor.lod()); tran_lod_tensor->set_layout(in_lod_tensor.layout()); tran_lod_tensor->ShareDataWith(tensor); } else if (in_var.IsType()) { - auto& in_selected_rows = in_var.Get(); - auto* trans_selected_rows = out_var->GetMutable(); + auto &in_selected_rows = in_var.Get(); + auto *trans_selected_rows = out_var->GetMutable(); trans_selected_rows->set_height(in_selected_rows.height()); trans_selected_rows->set_rows(in_selected_rows.rows()); trans_selected_rows->mutable_value()->ShareDataWith(tensor); diff --git a/paddle/fluid/framework/data_transform.h b/paddle/fluid/framework/data_transform.h index dee5d8c7c1..ae3ab051bd 100644 --- a/paddle/fluid/framework/data_transform.h +++ b/paddle/fluid/framework/data_transform.h @@ -30,12 +30,15 @@ limitations under the License. */ namespace paddle { namespace framework { -void DataTransform(const OpKernelType& expected_kernel_type, - const OpKernelType& kernel_type_for_var, - const Tensor& input_tensor, Tensor* out); - -void CopyVariableWithTensor(const Variable& in_var, const Tensor& tensor, - Variable* out_var); +void TransformData(const OpKernelType &expected_kernel_type, + const OpKernelType &kernel_type_for_var, + const Tensor &input_tensor, Tensor *out); + +/** + * Set OutVar from InVar, except the tensor is shared with `tensor` + */ +void SetTensorToVariable(const Variable &in_var, const Tensor &tensor, + Variable *out_var); } // namespace framework } // namespace paddle diff --git a/paddle/fluid/framework/details/CMakeLists.txt b/paddle/fluid/framework/details/CMakeLists.txt index 3c73b6cc55..4fb4ec38ee 100644 --- a/paddle/fluid/framework/details/CMakeLists.txt +++ b/paddle/fluid/framework/details/CMakeLists.txt @@ -25,11 +25,12 @@ else() cc_library(broadcast_op_handle SRCS broadcast_op_handle.cc DEPS op_handle_base scope ddim memory variable_visitor) endif() +cc_library(data_balance_op_handle SRCS data_balance_op_handle.cc DEPS op_handle_base scope lod_tensor) cc_library(gather_op_handle SRCS gather_op_handle.cc DEPS op_handle_base scope ddim memory variable_visitor) cc_library(fuse_vars_op_handle SRCS fuse_vars_op_handle.cc DEPS op_handle_base scope) cc_library(multi_devices_graph_builder SRCS multi_devices_graph_builder.cc DEPS ssa_graph_builder computation_op_handle - scale_loss_grad_op_handle rpc_op_handle all_reduce_op_handle reduce_op_handle broadcast_op_handle) + scale_loss_grad_op_handle rpc_op_handle all_reduce_op_handle reduce_op_handle broadcast_op_handle data_balance_op_handle) cc_library(ssa_graph_builder_factory SRCS ssa_graph_builder_factory.cc DEPS multi_devices_graph_builder ssa_graph_printer ssa_graph_checker) diff --git a/paddle/fluid/framework/details/broadcast_op_handle.cc b/paddle/fluid/framework/details/broadcast_op_handle.cc index d5ca061944..1d9f1bd6e4 100644 --- a/paddle/fluid/framework/details/broadcast_op_handle.cc +++ b/paddle/fluid/framework/details/broadcast_op_handle.cc @@ -73,6 +73,9 @@ void BroadcastOpHandle::RunImpl() { int root_id = boost::get(in_tensor.place()).device; std::vector> broadcast_calls; + int type = platform::ToNCCLDataType(in_tensor.type()); + size_t numel = static_cast(in_tensor.numel()); + for (auto out_var_handle : out_var_handles) { Variable *out_var = var_scopes.at(out_var_handle->scope_idx_) ->FindVar(out_var_handle->name_); @@ -87,13 +90,11 @@ void BroadcastOpHandle::RunImpl() { send_recv_buffer = const_cast(in_tensor.data()); out_handle = out_var_handle; } else { - send_recv_buffer = - VariableVisitor::GetMutableTensor(out_var).mutable_data( - out_var_handle->place_); + send_recv_buffer = VariableVisitor::GetMutableTensor(out_var) + .Resize(in_tensor.dims()) + .mutable_data(out_var_handle->place_); } - int type = platform::ToNCCLDataType(in_tensor.type()); - size_t numel = static_cast(in_tensor.numel()); broadcast_calls.emplace_back( [send_recv_buffer, numel, type, root_id, &nccl_ctx] { PADDLE_ENFORCE(platform::dynload::ncclBcast( diff --git a/paddle/fluid/framework/details/build_strategy.h b/paddle/fluid/framework/details/build_strategy.h index 64e83acb4d..b2e5399e23 100644 --- a/paddle/fluid/framework/details/build_strategy.h +++ b/paddle/fluid/framework/details/build_strategy.h @@ -33,6 +33,8 @@ struct BuildStrategy { GradientScaleStrategy gradient_scale_{GradientScaleStrategy::kCoeffNumDevice}; std::string debug_graphviz_path_{""}; + + bool enable_data_balance_{false}; }; } // namespace details diff --git a/paddle/fluid/framework/details/data_balance_op_handle.cc b/paddle/fluid/framework/details/data_balance_op_handle.cc new file mode 100644 index 0000000000..68896c8ac1 --- /dev/null +++ b/paddle/fluid/framework/details/data_balance_op_handle.cc @@ -0,0 +1,154 @@ +// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "paddle/fluid/framework/details/data_balance_op_handle.h" +#include +#include "paddle/fluid/framework/details/container_cast.h" + +namespace paddle { +namespace framework { +namespace details { + +#ifdef PADDLE_WITH_CUDA +DataBalanceOpHandle::DataBalanceOpHandle( + const std::vector &local_scopes, + const std::vector &places, + const platform::NCCLContextMap *ctxs) + : local_scopes_(local_scopes), places_(places) { + if (ctxs) { + for (auto &p : places_) { + this->dev_ctxes_[p] = ctxs->DevCtx(p); + } + } +} +#else +DataBalanceOpHandle::DataBalanceOpHandle( + const std::vector &local_scopes, + const std::vector &places) + : local_scopes_(local_scopes), places_(places) {} +#endif + +std::string DataBalanceOpHandle::Name() const { return "data balance"; } + +std::vector> DataBalanceOpHandle::GetBalancePlan( + const std::vector &device_sizes) { + int device_num = device_sizes.size(); + int total_size = 0; + int empty_num = 0; + std::vector> size_device_vec; + size_device_vec.reserve(device_num); + for (int i = 0; i < device_num; ++i) { + if (device_sizes[i] == 0) { + ++empty_num; + } + total_size += device_sizes[i]; + size_device_vec.push_back({{device_sizes[i], i}}); + } + std::vector> res; + if (empty_num == 0) { + // No need to do data balance. + return res; + } + if (total_size < device_num) { + // No enough data. + PADDLE_THROW_EOF(); + } + std::sort(size_device_vec.begin(), size_device_vec.end(), + [](const std::array &a, const std::array &b) { + return a[0] > b[0]; + }); + int expected_device_size = total_size / device_num; + int src_idx = 0; + for (int dst_idx = device_num - empty_num; dst_idx < device_num; ++dst_idx) { + if (size_device_vec[src_idx][0] <= expected_device_size) { + ++src_idx; + PADDLE_ENFORCE_LT( + src_idx, device_num - empty_num, + "In current srategy an empty tensor should not be copy source."); + } + size_device_vec[src_idx][0] -= expected_device_size; + size_device_vec[dst_idx][0] += expected_device_size; + res.push_back({{size_device_vec[src_idx][1], size_device_vec[dst_idx][1], + expected_device_size}}); + } + return res; +} + +void DataBalanceOpHandle::RunImpl() { + PADDLE_ENFORCE_GT(places_.size(), 1, + "Data balance can only be enabled when the number of " + "places to run larger than 1."); + auto in_var_handles = DynamicCast(inputs_); + auto out_var_handles = DynamicCast(outputs_); + PADDLE_ENFORCE(in_var_handles.size() % places_.size() == 0); + PADDLE_ENFORCE_EQ( + in_var_handles.size(), out_var_handles.size(), + "The NoDummyInputSize and NoDummyOutputSize should be equal."); + int data_num = in_var_handles.size() / places_.size(); + WaitInputVarGenerated(); + std::vector> lod_tensors(data_num); + std::vector device_sizes; + for (int i = 0; i < static_cast(in_var_handles.size()); ++i) { + PADDLE_ENFORCE_EQ(in_var_handles[i]->name_, out_var_handles[i]->name_, + "The name of input and output should be equal."); + int place_idx = i / data_num; + int data_idx = i % data_num; + auto *local_scope = + local_scopes_[place_idx]->FindVar(kLocalExecScopeName)->Get(); + auto *tensor_var = local_scope->FindVar(in_var_handles[i]->name_); + PADDLE_ENFORCE(tensor_var->IsType()); + auto *tensor = tensor_var->GetMutable(); + lod_tensors[data_idx].push_back(tensor); + int ins_size = + tensor->lod().empty() ? tensor->dims()[0] : tensor->NumElements(); + if (data_idx == 0) { + device_sizes.emplace_back(ins_size); + } else { + PADDLE_ENFORCE_EQ( + ins_size, device_sizes.at(place_idx), + "All data on the same device shall have the same batch size."); + } + } + const auto &balance_plan = GetBalancePlan(device_sizes); + + for (const auto &trans : balance_plan) { + for (int data_idx = 0; data_idx < data_num; ++data_idx) { + LoDTensor *src_tensor = lod_tensors[data_idx][trans[0]]; + LoDTensor *dst_tensor = lod_tensors[data_idx][trans[1]]; + int trans_ins_size = trans[2]; + LoD src_lod = src_tensor->lod(); + int src_ins_size = + src_lod.empty() ? src_tensor->dims()[0] : src_tensor->NumElements(); + int cut_point = src_ins_size - trans_ins_size; + if (!src_lod.empty()) { + for (auto &level : src_lod) { + cut_point = level[cut_point]; + } + } + TensorCopySync(src_tensor->Slice(cut_point, src_tensor->dims()[0]), + dst_tensor->place(), dst_tensor); + src_tensor->ShareDataWith(src_tensor->Slice(0, cut_point)); + if (!src_lod.empty()) { + dst_tensor->set_lod(SliceInLevel( + src_lod, 0, src_ins_size - trans_ins_size, src_ins_size)); + src_tensor->set_lod( + SliceInLevel(src_lod, 0, 0, src_ins_size - trans_ins_size)); + } + } + } +} + +} // namespace details +} // namespace framework +} // namespace paddle diff --git a/paddle/fluid/framework/details/data_balance_op_handle.h b/paddle/fluid/framework/details/data_balance_op_handle.h new file mode 100644 index 0000000000..76a407e361 --- /dev/null +++ b/paddle/fluid/framework/details/data_balance_op_handle.h @@ -0,0 +1,59 @@ +// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#pragma once + +#include +#include +#include "paddle/fluid/framework/details/op_handle_base.h" +#include "paddle/fluid/framework/lod_tensor.h" +#include "paddle/fluid/framework/scope.h" +#ifdef PADDLE_WITH_CUDA +#include "paddle/fluid/platform/nccl_helper.h" +#endif + +namespace paddle { +namespace framework { +namespace details { + +struct DataBalanceOpHandle : public OpHandleBase { + public: +#ifdef PADDLE_WITH_CUDA + DataBalanceOpHandle(const std::vector &local_scopes, + const std::vector &places, + const platform::NCCLContextMap *ctxs); +#else + DataBalanceOpHandle(const std::vector &local_scopes, + const std::vector &places); +#endif + + std::string Name() const override; + + bool IsMultiDeviceTransfer() override { return false; }; + + protected: + void RunImpl() override; + + private: + // std::vector<(src_dev_id, dst_dev_id, trans_size)> + std::vector> GetBalancePlan( + const std::vector &batch_size_per_device); + + const std::vector local_scopes_; + const std::vector places_; +}; + +} // namespace details +} // namespace framework +} // namespace paddle diff --git a/paddle/fluid/framework/details/fetch_op_handle.cc b/paddle/fluid/framework/details/fetch_op_handle.cc index 224e8e1f6e..d646c94460 100644 --- a/paddle/fluid/framework/details/fetch_op_handle.cc +++ b/paddle/fluid/framework/details/fetch_op_handle.cc @@ -67,8 +67,8 @@ void FetchOpHandle::RunImpl() { #endif } else { tensors_[i].ShareDataWith(t); - tensors_[i].set_lod(t.lod()); } + tensors_[i].set_lod(t.lod()); } this->WaitAndMergeCPUTensors(); diff --git a/paddle/fluid/framework/details/multi_devices_graph_builder.cc b/paddle/fluid/framework/details/multi_devices_graph_builder.cc index 78356cb1be..b82c2ef408 100644 --- a/paddle/fluid/framework/details/multi_devices_graph_builder.cc +++ b/paddle/fluid/framework/details/multi_devices_graph_builder.cc @@ -20,6 +20,7 @@ #include "paddle/fluid/framework/details/all_reduce_op_handle.h" #include "paddle/fluid/framework/details/broadcast_op_handle.h" #include "paddle/fluid/framework/details/computation_op_handle.h" +#include "paddle/fluid/framework/details/data_balance_op_handle.h" #include "paddle/fluid/framework/details/multi_devices_graph_builder.h" #include "paddle/fluid/framework/details/reduce_op_handle.h" #include "paddle/fluid/framework/details/rpc_op_handle.h" @@ -57,6 +58,12 @@ MultiDevSSAGraphBuilder::MultiDevSSAGraphBuilder( for (auto &p : params) { grad_names_.insert(GradVarName(p)); } + balance_vars_.resize(places_.size(), 0); + if (strategy_.enable_data_balance_ && places_.size() == 1) { + LOG(WARNING) << "It is no need to enable data balance when there is only " + "one place. enable_data_balance is set to False."; + strategy_.enable_data_balance_ = false; + } } void MultiDevSSAGraphBuilder::CreateOpHandleIOs(SSAGraph *result, @@ -140,11 +147,30 @@ bool MultiDevSSAGraphBuilder::IsDistTrainOp( checker(op.InputArgumentNames(), recv_vars); } +size_t MultiDevSSAGraphBuilder::GetAppropriateDeviceID( + const std::vector &var_names) const { + int64_t numel_sum = 0; + for (auto var_name : var_names) { + auto var_desc = all_vars_.at(var_name); + PADDLE_ENFORCE_NOT_NULL(var_desc); + auto dim = framework::make_ddim(var_desc->GetShape()); + int64_t numel = framework::product(dim); + PADDLE_ENFORCE_GT(numel, 0); + numel_sum += numel; + } + + auto smallest = + std::min_element(std::begin(balance_vars_), std::end(balance_vars_)); + size_t dev_id = + static_cast(std::distance(std::begin(balance_vars_), smallest)); + balance_vars_[dev_id] += numel_sum; + return dev_id; +} + std::unique_ptr MultiDevSSAGraphBuilder::Build( const ProgramDesc &program) const { - std::unordered_map all_vars; for (auto *var : program.Block(0).AllVars()) { - all_vars[var->Name()] = var; + all_vars_.emplace(var->Name(), var); } auto graph = new SSAGraph(); @@ -161,35 +187,16 @@ std::unique_ptr MultiDevSSAGraphBuilder::Build( auto send_vars = FindDistTrainSendVars(program); auto recv_vars = FindDistTrainRecvVars(program); - std::vector> var_name_on_devices; std::vector> bcast_var_name_set; - var_name_on_devices.resize(places_.size()); bcast_var_name_set.resize(places_.size()); size_t cur_device_id = 0; - std::vector balance_grads(places_.size(), 0); - - auto get_appropriate_dev = [&](std::string &g_name) -> size_t { - auto var_desc = all_vars.at(g_name); - PADDLE_ENFORCE_NOT_NULL(var_desc); - auto dim = framework::make_ddim(var_desc->GetShape()); - int64_t numel = framework::product(dim); - PADDLE_ENFORCE_GE(numel, 0); - auto smallest = - std::min_element(std::begin(balance_grads), std::end(balance_grads)); - size_t dev_id = - static_cast(std::distance(std::begin(balance_grads), smallest)); - balance_grads[dev_id] += numel; - return dev_id; - }; - bool is_forwarding = true; + for (auto *op : program.Block(0).AllOps()) { if (boost::get( op->GetAttr(OpProtoAndCheckerMaker::OpRoleAttrName())) == static_cast(OpRole::kRPC)) { - // append rpc op if program is distributed trainer main program. - // always use the first device CreateRPCOp(&result, *op); } else if (IsDistTrainOp(*op, send_vars, recv_vars)) { CreateDistTrainOp(&result, *op); @@ -199,53 +206,70 @@ std::unique_ptr MultiDevSSAGraphBuilder::Build( BuildStrategy::GradientScaleStrategy::kCustomized) { CreateScaleLossGradOp(&result); } + // This assumes the backward generating code will ensure IsScaleLossOp + // is true only for the op that scale the final scalar loss. + // It also assumes backward op will always follow the forward op in + // the block. is_forwarding = false; } else { - int op_dev_id = GetOpDeviceID(var_name_on_devices, *op); - if (op_dev_id == -1) { // var on all device - CreateComputationalOps(&result, *op, places_.size()); - } else { + int op_dev_id = GetOpDeviceID(*op); + if (op_dev_id != -1) { // This op only runs on one specific device. CreateComputationalOp(&result, *op, op_dev_id); for (auto &var_name : op->OutputArgumentNames()) { - var_name_on_devices[op_dev_id].emplace(var_name); + var_name_on_devices_.emplace(var_name, op_dev_id); } - } - if (!is_forwarding && places_.size() > 1) { - // Currently, we assume that once gradient is generated, it can be - // broadcast, and each gradient is only broadcast once. - if (static_cast(boost::get(op->GetAttr( - OpProtoAndCheckerMaker::OpRoleAttrName())) & - static_cast(OpRole::kBackward))) { - try { - auto backward_vars = - boost::get>(op->GetNullableAttr( - OpProtoAndCheckerMaker::OpRoleVarAttrName())); - - PADDLE_ENFORCE_EQ(backward_vars.size() % 2, 0); - - for (size_t i = 0; i < backward_vars.size(); i += 2) { - auto &p_name = backward_vars[i]; - auto &g_name = backward_vars[i + 1]; - VLOG(10) << "Bcast " << g_name << " for parameter " << p_name; - - switch (strategy_.reduce_) { - case BuildStrategy::ReduceStrategy::kReduce: - cur_device_id = get_appropriate_dev(g_name); - CreateReduceOp(&result, g_name, cur_device_id); - var_name_on_devices[cur_device_id].emplace(g_name); - bcast_var_name_set[cur_device_id].emplace(p_name); - break; - case BuildStrategy::ReduceStrategy::kAllReduce: - if (IsSparseGradient(all_vars, g_name)) { - CreateReduceOp(&result, g_name, 0); - CreateBroadcastOp(&result, g_name, 0); - } else { - InsertAllReduceOp(&result, g_name); - } - break; + } else { + // This op runs on all devices, and its output may have parameter's + // gradients. + if (op->Type() == "read" && strategy_.enable_data_balance_) { + op->SetAttr("throw_eof_exp", false); + CreateComputationalOps(&result, *op, places_.size()); + const auto &data_var_names = op->Output("Out"); + InsertDataBalanceOp(&result, data_var_names); + } else { + CreateComputationalOps(&result, *op, places_.size()); + } + + if (!is_forwarding && places_.size() > 1) { + // Currently, we assume that once gradient is generated, it can be + // broadcast, and each gradient is only broadcast once. + if (static_cast(boost::get(op->GetAttr( + OpProtoAndCheckerMaker::OpRoleAttrName())) & + static_cast(OpRole::kBackward))) { + try { + auto backward_vars = + boost::get>(op->GetNullableAttr( + OpProtoAndCheckerMaker::OpRoleVarAttrName())); + + PADDLE_ENFORCE_EQ(backward_vars.size() % 2, 0); + + for (size_t i = 0; i < backward_vars.size(); i += 2) { + auto &p_name = backward_vars[i]; + auto &g_name = backward_vars[i + 1]; + VLOG(10) << "Bcast " << g_name << " for parameter " << p_name; + + switch (strategy_.reduce_) { + case BuildStrategy::ReduceStrategy::kReduce: + cur_device_id = GetAppropriateDeviceID({g_name}); + CreateReduceOp(&result, g_name, cur_device_id); + var_name_on_devices_.emplace(g_name, cur_device_id); + bcast_var_name_set[cur_device_id].emplace(p_name); + break; + case BuildStrategy::ReduceStrategy::kAllReduce: + if (IsSparseGradient(g_name)) { + CreateReduceOp(&result, g_name, 0); + CreateBroadcastOp(&result, g_name, 0); + } else { + InsertAllReduceOp(&result, g_name); + } + break; + default: + LOG(FATAL) << "Unknown reduce strategy "; + break; + } } + } catch (boost::bad_get e) { } - } catch (boost::bad_get e) { } } } @@ -261,7 +285,7 @@ std::unique_ptr MultiDevSSAGraphBuilder::Build( } /* Dependency graph has been constructed. However, there are still data - harzaeds need to be handled. + hazards need to be handled. */ PolishGraphToSupportDataHazards(&result); @@ -273,11 +297,9 @@ std::unique_ptr MultiDevSSAGraphBuilder::Build( return std::unique_ptr(graph); } -bool MultiDevSSAGraphBuilder::IsSparseGradient( - const std::unordered_map &all_vars, - const std::string &og) const { - PADDLE_ENFORCE(all_vars.count(og) != 0); - if (all_vars.at(og)->GetType() == proto::VarType::SELECTED_ROWS) { +bool MultiDevSSAGraphBuilder::IsSparseGradient(const std::string &og) const { + PADDLE_ENFORCE(all_vars_.count(og) != 0); + if (all_vars_.at(og)->GetType() == proto::VarType::SELECTED_ROWS) { return true; } return false; @@ -345,12 +367,35 @@ void MultiDevSSAGraphBuilder::InsertAllReduceOp(SSAGraph *result, auto &prev_grad = vars.back(); op_handle->AddInput(prev_grad.get()); - auto var = new VarHandle(vars.size() - 1, i, og, p); + auto var = new VarHandle(vars.size(), i, og, p); vars.emplace_back(var); op_handle->AddOutput(var); } } +void MultiDevSSAGraphBuilder::InsertDataBalanceOp( + SSAGraph *result, const std::vector &datas) const { +#ifdef PADDLE_WITH_CUDA + result->ops_.emplace_back( + new DataBalanceOpHandle(local_scopes_, places_, nccl_ctxs_)); +#else + result->ops_.emplace_back(new DataBalanceOpHandle(local_scopes_, places_)); +#endif + auto *op_handle = result->ops_.back().get(); + for (size_t i = 0; i < places_.size(); ++i) { + auto &p = places_[i]; + SetCommunicationContext(op_handle, p); + for (const std::string &d_name : datas) { + auto &vars = result->vars_[i][d_name]; + PADDLE_ENFORCE(!vars.empty()); + op_handle->AddInput(vars.back().get()); + auto var = new VarHandle(vars.size(), i, d_name, p); + vars.emplace_back(var); + op_handle->AddOutput(var); + } + } +} + bool MultiDevSSAGraphBuilder::IsParameterGradientOnce( const std::string &og, std::unordered_set *og_has_been_broadcast) const { @@ -363,24 +408,23 @@ bool MultiDevSSAGraphBuilder::IsParameterGradientOnce( return is_pg_once; } -int MultiDevSSAGraphBuilder::GetOpDeviceID( - const std::vector> &var_name_on_devices, - const OpDesc &op) const { +int MultiDevSSAGraphBuilder::GetOpDeviceID(const OpDesc &op) const { if (strategy_.reduce_ != BuildStrategy::ReduceStrategy::kReduce) { return -1; } - int var_dev_id = -1; - for (auto &var_name : op.InputArgumentNames()) { - if (var_dev_id != -1) break; - for (size_t i = 0; i < var_name_on_devices.size(); ++i) { - if (var_name_on_devices[i].count(var_name)) { - var_dev_id = static_cast(i); - break; - } + for (auto &varname : op.InputArgumentNames()) { + int dev_id = GetVarDeviceID(varname); + if (dev_id != -1) { + return dev_id; } } - return var_dev_id; + return -1; +} + +int MultiDevSSAGraphBuilder::GetVarDeviceID(const std::string &varname) const { + auto got = var_name_on_devices_.find(varname); + return got == var_name_on_devices_.end() ? -1 : got->second; } void MultiDevSSAGraphBuilder::CreateScaleLossGradOp(SSAGraph *result) const { @@ -442,13 +486,14 @@ VarHandle *MultiDevSSAGraphBuilder::CreateReduceOp(SSAGraph *result, op_handle->AddInput(prev_grad.get()); } auto &vars = result->vars_[dst_dev_id][og]; - auto var = - new VarHandle(vars.size() - 1, dst_dev_id, og, places_[dst_dev_id]); + auto var = new VarHandle(vars.size(), dst_dev_id, og, places_[dst_dev_id]); vars.emplace_back(var); op_handle->AddOutput(var); return var; } +// Find the first occurence of `prev_op_name` and make current `op` depend +// on it. void MultiDevSSAGraphBuilder::ConnectOp(SSAGraph *result, OpHandleBase *op, const std::string &prev_op_name) const { for (auto &prev_op : result->ops_) { @@ -463,16 +508,70 @@ void MultiDevSSAGraphBuilder::ConnectOp(SSAGraph *result, OpHandleBase *op, void MultiDevSSAGraphBuilder::CreateDistTrainOp(SSAGraph *result, const OpDesc &op) const { - CreateComputationalOp(result, op, 0); + int op_dev_id = -1; + if (op.Type() == "split_byref" || op.Type() == "split_selected_rows") { + op_dev_id = GetVarDeviceID(op.InputArgumentNames()[0]); + if (strategy_.reduce_ == BuildStrategy::ReduceStrategy::kAllReduce) { + op_dev_id = GetAppropriateDeviceID(op.InputArgumentNames()); + for (auto &varname : op.InputArgumentNames()) { + var_name_on_devices_.emplace(varname, op_dev_id); + } + } + for (auto &varname : op.OutputArgumentNames()) { + var_name_on_devices_.emplace(varname, op_dev_id); + } + } else if (op.Type() == "concat") { + op_dev_id = GetVarDeviceID(op.InputArgumentNames()[0]); + for (auto &varname : op.OutputArgumentNames()) { + var_name_on_devices_.emplace(varname, op_dev_id); + } + } else { + PADDLE_ENFORCE( + "the distribute training related op should be in [split_byref, " + "concat]."); + } + + PADDLE_ENFORCE(op_dev_id != -1, + "can not find right place for distributed op: %s", op.Type()); + + CreateComputationalOp(result, op, op_dev_id); if (op.Type() == "concat") { ConnectOp(result, result->ops_.back().get(), "fetch_barrier"); } } +// Create RPC related op handles that connects its in ops and out ops. void MultiDevSSAGraphBuilder::CreateRPCOp(SSAGraph *result, const OpDesc &op) const { - result->ops_.emplace_back( - new RPCOpHandle(op, local_scopes_[0], op.Type(), places_[0])); + int op_dev_id = -1; + if (op.Type() == "send") { + op_dev_id = GetVarDeviceID(op.InputArgumentNames()[0]); + // the variable name which contains .block means it was splited by + // split_byref op + // so that we can balance the variable blocks to all the pserver + // instances. + if (strategy_.reduce_ == BuildStrategy::ReduceStrategy::kAllReduce && + op.InputArgumentNames()[0].find(".block") == std::string::npos) { + op_dev_id = GetAppropriateDeviceID(op.InputArgumentNames()); + for (auto &varname : op.InputArgumentNames()) { + var_name_on_devices_.emplace(varname, op_dev_id); + } + } + } else if (op.Type() == "recv") { + op_dev_id = GetAppropriateDeviceID(op.OutputArgumentNames()); + for (auto &varname : op.OutputArgumentNames()) { + var_name_on_devices_.emplace(varname, op_dev_id); + } + } else { + // send_barrier and fetch_barrier op can be scheduled on device 0 + op_dev_id = 0; + } + + PADDLE_ENFORCE(op_dev_id != -1, "can not find the right place for rpc op: %s", + op.Type()); + + result->ops_.emplace_back(new RPCOpHandle(op, local_scopes_[op_dev_id], + op.Type(), places_[op_dev_id])); if (op.Type() == "send_barrier") { ConnectOp(result, result->ops_.back().get(), "send"); @@ -488,9 +587,7 @@ void MultiDevSSAGraphBuilder::CreateRPCOp(SSAGraph *result, "send, send_barrier. recv, fetch_barrier]"); } - // TODO(Yancey1989): schedule rpc op on different place may - // increate throughput - CreateOpHandleIOs(result, op, 0); + CreateOpHandleIOs(result, op, op_dev_id); } bool MultiDevSSAGraphBuilder::IsScaleLossOp(const OpDesc &op) const { diff --git a/paddle/fluid/framework/details/multi_devices_graph_builder.h b/paddle/fluid/framework/details/multi_devices_graph_builder.h index 78581755fe..a964e02488 100644 --- a/paddle/fluid/framework/details/multi_devices_graph_builder.h +++ b/paddle/fluid/framework/details/multi_devices_graph_builder.h @@ -47,10 +47,11 @@ class MultiDevSSAGraphBuilder : public SSAGraphBuilder { #endif std::unique_ptr Build(const ProgramDesc &program) const override; + int GetVarDeviceID(const std::string &varname) const override; private: void CreateOpHandleIOs(SSAGraph *result, const OpDesc &op, - size_t place_id) const; + size_t device_id) const; private: std::string loss_var_name_; @@ -96,21 +97,26 @@ class MultiDevSSAGraphBuilder : public SSAGraphBuilder { const std::string &og, std::unordered_set *og_has_been_broadcast) const; - int GetOpDeviceID( - const std::vector> &var_name_on_devices, - const OpDesc &op) const; + int GetOpDeviceID(const OpDesc &op) const; void InsertAllReduceOp(SSAGraph *result, const std::string &og) const; + void InsertDataBalanceOp(SSAGraph *result, + const std::vector &datas) const; + void CreateBroadcastOp(SSAGraph *result, const std::string &p_name, size_t src_dev_id) const; - bool IsSparseGradient( - const std::unordered_map &all_vars, - const std::string &og) const; + bool IsSparseGradient(const std::string &og) const; + + size_t GetAppropriateDeviceID( + const std::vector &var_names) const; private: BuildStrategy strategy_; + mutable std::unordered_map all_vars_; + mutable std::unordered_map var_name_on_devices_; + mutable std::vector balance_vars_; void SetCommunicationContext(OpHandleBase *op_handle, const platform::Place &p) const; diff --git a/paddle/fluid/framework/details/op_handle_base.cc b/paddle/fluid/framework/details/op_handle_base.cc index f79565fe71..d80bdcf15d 100644 --- a/paddle/fluid/framework/details/op_handle_base.cc +++ b/paddle/fluid/framework/details/op_handle_base.cc @@ -11,8 +11,8 @@ // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. - #include "paddle/fluid/framework/details/op_handle_base.h" +#include namespace paddle { namespace framework { @@ -58,8 +58,10 @@ void OpHandleBase::Run(bool use_cuda) { void OpHandleBase::RecordWaitEventOnCtx(platform::DeviceContext *waited_ctx) { #ifdef PADDLE_WITH_CUDA + PADDLE_ENFORCE_NOT_NULL(waited_ctx); if (platform::is_cpu_place(waited_ctx->GetPlace()) || events_.empty()) { for (auto &dev_ctx : dev_ctxes_) { + PADDLE_ENFORCE_NOT_NULL(dev_ctx.second); dev_ctx.second->Wait(); } } else { @@ -122,7 +124,6 @@ void OpHandleBase::RunAndRecordEvent(const std::function &callback) { #ifdef PADDLE_WITH_CUDA if (!events_.empty()) { // Use event std::function method = callback; - for (auto &p : dev_ctxes_) { method = [method, p, this]() { static_cast(p.second)->RecordEvent( diff --git a/paddle/fluid/framework/details/op_handle_base.h b/paddle/fluid/framework/details/op_handle_base.h index fbd90a3296..6aec178831 100644 --- a/paddle/fluid/framework/details/op_handle_base.h +++ b/paddle/fluid/framework/details/op_handle_base.h @@ -13,9 +13,9 @@ // limitations under the License. #pragma once +#include #include #include - #include "paddle/fluid/framework/details/var_handle.h" #include "paddle/fluid/platform/device_context.h" #include "paddle/fluid/platform/macros.h" @@ -92,9 +92,7 @@ class OpHandleBase { std::vector inputs_; std::vector outputs_; - std::unordered_map - dev_ctxes_; + std::map dev_ctxes_; #ifdef PADDLE_WITH_CUDA std::unordered_map events_; diff --git a/paddle/fluid/framework/details/reduce_and_gather.h b/paddle/fluid/framework/details/reduce_and_gather.h index a6ffb37313..c0cd873a1d 100644 --- a/paddle/fluid/framework/details/reduce_and_gather.h +++ b/paddle/fluid/framework/details/reduce_and_gather.h @@ -54,8 +54,7 @@ struct ReduceLoDTensor { inline void GatherSelectedRows( const std::vector &src_selecte_rows_, const std::vector &in_places, - const std::unordered_map &dev_ctxes, + const std::map &dev_ctxes, const platform::Place &out_place, SelectedRows *dst_selecte_rows) { PADDLE_ENFORCE(!src_selecte_rows_.empty()); diff --git a/paddle/fluid/framework/details/ssa_graph_builder.h b/paddle/fluid/framework/details/ssa_graph_builder.h index 5fc12a44b5..18612c3c1b 100644 --- a/paddle/fluid/framework/details/ssa_graph_builder.h +++ b/paddle/fluid/framework/details/ssa_graph_builder.h @@ -30,6 +30,7 @@ class SSAGraphBuilder { SSAGraphBuilder() {} virtual ~SSAGraphBuilder() {} virtual std::unique_ptr Build(const ProgramDesc &program) const = 0; + virtual int GetVarDeviceID(const std::string &var_name) const = 0; DISABLE_COPY_AND_ASSIGN(SSAGraphBuilder); diff --git a/paddle/fluid/framework/details/ssa_graph_checker.h b/paddle/fluid/framework/details/ssa_graph_checker.h index 304b221e7e..331aa9d2b5 100644 --- a/paddle/fluid/framework/details/ssa_graph_checker.h +++ b/paddle/fluid/framework/details/ssa_graph_checker.h @@ -16,6 +16,8 @@ #include "paddle/fluid/framework/details/ssa_graph_builder.h" +#include + namespace paddle { namespace framework { namespace details { @@ -33,6 +35,10 @@ class SSAGraghBuilderWithChecker : public SSAGraphBuilder { return graph; } + int GetVarDeviceID(const std::string& var_name) const override { + return builder_->GetVarDeviceID(var_name); + } + bool IsValidGraph(const SSAGraph* graph) const; private: diff --git a/paddle/fluid/framework/details/ssa_graph_printer.h b/paddle/fluid/framework/details/ssa_graph_printer.h index b4c9001378..09b0333ef2 100644 --- a/paddle/fluid/framework/details/ssa_graph_printer.h +++ b/paddle/fluid/framework/details/ssa_graph_printer.h @@ -15,6 +15,7 @@ #pragma once #include +#include #include "paddle/fluid/framework/details/ssa_graph_builder.h" namespace paddle { @@ -55,6 +56,10 @@ class SSAGraghBuilderWithPrinter : public SSAGraphBuilder { return graph; } + int GetVarDeviceID(const std::string& var_name) const override { + return builder_->GetVarDeviceID(var_name); + } + private: std::unique_ptr printer_; std::unique_ptr builder_; diff --git a/paddle/fluid/framework/details/threaded_ssa_graph_executor.cc b/paddle/fluid/framework/details/threaded_ssa_graph_executor.cc index 6c5098ce85..99b10254a7 100644 --- a/paddle/fluid/framework/details/threaded_ssa_graph_executor.cc +++ b/paddle/fluid/framework/details/threaded_ssa_graph_executor.cc @@ -96,10 +96,20 @@ FeedFetchList ThreadedSSAGraphExecutor::Run( auto cur_ready_vars = ready_vars.PopAll(1, &timeout); if (timeout) { + std::lock_guard l(exception_mu_); if (exception_) { - auto exp = *exception_; - exception_.reset(); - throw exp; + std::exception *exp = exception_.get(); + if (dynamic_cast(exp)) { + auto e = *static_cast(exp); + exception_.reset(); + throw e; + } else if (dynamic_cast(exp)) { + auto e = *static_cast(exp); + exception_.reset(); + throw e; + } else { + LOG(FATAL) << "Unknown exception."; + } } else { continue; } @@ -198,7 +208,14 @@ void ThreadedSSAGraphExecutor::RunOp( running_ops_--; ready_var_q->Extend(op->Outputs()); VLOG(10) << op << " " << op->Name() << "Signal posted"; + } catch (platform::EOFException ex) { + std::lock_guard l(exception_mu_); + // EOFException will not cover up existing EnforceNotMet. + if (exception_.get() == nullptr) { + exception_.reset(new platform::EOFException(ex)); + } } catch (platform::EnforceNotMet ex) { + std::lock_guard l(exception_mu_); exception_.reset(new platform::EnforceNotMet(ex)); } catch (...) { LOG(FATAL) << "Unknown exception catched"; diff --git a/paddle/fluid/framework/details/threaded_ssa_graph_executor.h b/paddle/fluid/framework/details/threaded_ssa_graph_executor.h index 4a2075f1cc..c69e0487e2 100644 --- a/paddle/fluid/framework/details/threaded_ssa_graph_executor.h +++ b/paddle/fluid/framework/details/threaded_ssa_graph_executor.h @@ -56,7 +56,8 @@ class ThreadedSSAGraphExecutor : public SSAGraphExecutor { std::vector local_scopes_; std::vector places_; platform::DeviceContextPool fetch_ctxs_; - std::unique_ptr exception_; + std::mutex exception_mu_; + std::unique_ptr exception_; std::atomic running_ops_; void InsertPendingOp(std::unordered_map *pending_ops, diff --git a/paddle/fluid/framework/executor.cc b/paddle/fluid/framework/executor.cc index e15232a77b..84f67fafa1 100644 --- a/paddle/fluid/framework/executor.cc +++ b/paddle/fluid/framework/executor.cc @@ -20,9 +20,7 @@ limitations under the License. */ #include "paddle/fluid/framework/lod_tensor_array.h" #include "paddle/fluid/framework/op_registry.h" #include "paddle/fluid/framework/reader.h" -#ifdef PADDLE_WITH_DISTRIBUTE -#include "paddle/fluid/operators/detail/grpc_client.h" -#endif +#include "paddle/fluid/operators/detail/macros.h" #include "paddle/fluid/platform/place.h" #include "paddle/fluid/platform/profiler.h" @@ -48,10 +46,16 @@ ExecutorPrepareContext::~ExecutorPrepareContext() { Executor::Executor(const platform::Place& place) : place_(place) {} #ifdef PADDLE_WITH_DISTRIBUTE -void Executor::Complete() { - ::paddle::operators::detail::RPCClient::GetInstance< - ::paddle::operators::detail::GRPCClient>() - ->SendComplete(); +void Executor::BeginPass() { + ::paddle::operators::distributed::RPCClient::GetInstance< + ::paddle::operators::distributed::GRPCClient>() + ->SendBeginPass(); +} + +void Executor::EndPass() { + ::paddle::operators::distributed::RPCClient::GetInstance< + ::paddle::operators::distributed::GRPCClient>() + ->SendEndPass(); } #endif @@ -295,13 +299,14 @@ void Executor::Run(const ProgramDesc& program, Scope* scope, std::unique_ptr Executor::Prepare( const ProgramDesc& program, int block_id) { - auto* ctx = new ExecutorPrepareContext(program, block_id); + std::unique_ptr ctx( + new ExecutorPrepareContext(program, block_id)); PADDLE_ENFORCE_LT(static_cast(block_id), program.Size()); auto& block = program.Block(block_id); for (auto& op_desc : block.AllOps()) { ctx->ops_.push_back(OpRegistry::CreateOp(*op_desc)); } - return std::unique_ptr(ctx); + return ctx; } std::vector> Executor::Prepare( @@ -320,7 +325,8 @@ std::vector> Executor::Prepare( } void Executor::RunPreparedContext(ExecutorPrepareContext* ctx, Scope* scope, - bool create_local_scope, bool create_vars) { + bool create_local_scope, bool create_vars, + bool keep_kids) { Scope* local_scope = scope; if (create_vars) { if (create_local_scope) { @@ -343,12 +349,20 @@ void Executor::RunPreparedContext(ExecutorPrepareContext* ctx, Scope* scope, } } platform::DeviceContextPool::Instance().Get(place_)->Wait(); - if (create_vars && create_local_scope) { + if (local_scope != scope) { scope->DeleteScope(local_scope); } else { - // Delete the local scopes created in operators. - scope->DropKids(); + if (!keep_kids) { + // By default, we should delete all kid scopes after run executor because + // some operators may create local scope when running, such as while_op. + // But when while_op also create a local executor to run it's sub block, + // the sub scopes it created should not be dropped immediately, because + // while_grad_op will use some variables created during while_op run, so + // we need to keep the kids and wait for the outer executor to drop them. + scope->DropKids(); + } } + if (FLAGS_benchmark) { VLOG(2) << "-------------------------------------------------------"; VLOG(2) << "Memory used after deleting local scope: " @@ -406,6 +420,9 @@ void Executor::EnableMKLDNN(const ProgramDesc& program) { } } } +#else + LOG(WARNING) + << "'MKLDNN' is not supported, Please re-compile with WITH_MKLDNN option"; #endif } diff --git a/paddle/fluid/framework/executor.h b/paddle/fluid/framework/executor.h index 67a0761dac..563a4b2bb6 100644 --- a/paddle/fluid/framework/executor.h +++ b/paddle/fluid/framework/executor.h @@ -46,9 +46,14 @@ class Executor { #ifdef PADDLE_WITH_DISTRIBUTE /* - * Sending signal to pserver to mark current trainer stop. + * Sending signal to pserver to mark current pass started. */ - void Complete(); + void BeginPass(); + + /* + * Sending signal to pserver to mark current pass finished. + */ + void EndPass(); #endif /* @Brief @@ -78,7 +83,7 @@ class Executor { void RunPreparedContext(ExecutorPrepareContext* ctx, Scope* scope, bool create_local_scope = true, - bool create_vars = true); + bool create_vars = true, bool keep_kids = false); void RunPreparedContext(ExecutorPrepareContext* ctx, Scope* scope, std::map* feed_targets, diff --git a/paddle/fluid/framework/framework.proto b/paddle/fluid/framework/framework.proto index 68fcc104d4..2cf14bd371 100644 --- a/paddle/fluid/framework/framework.proto +++ b/paddle/fluid/framework/framework.proto @@ -27,6 +27,7 @@ enum AttrType { BOOLEANS = 7; BLOCK = 8; LONG = 9; + BLOCKS = 10; } // OpDesc describes an instance of a C++ framework::OperatorBase @@ -46,6 +47,7 @@ message OpDesc { repeated bool bools = 11; optional int32 block_idx = 12; optional int64 l = 13; + repeated int32 blocks_idx = 14; }; message Var { diff --git a/paddle/fluid/framework/lod_tensor.cc b/paddle/fluid/framework/lod_tensor.cc index a56674cbe2..cba0064f38 100644 --- a/paddle/fluid/framework/lod_tensor.cc +++ b/paddle/fluid/framework/lod_tensor.cc @@ -20,6 +20,7 @@ limitations under the License. */ #include "paddle/fluid/framework/data_type.h" #include "paddle/fluid/framework/framework.pb.h" #include "paddle/fluid/framework/lod_tensor.h" +#include "paddle/fluid/framework/var_type.h" #include "paddle/fluid/memory/memcpy.h" #include "paddle/fluid/memory/memory.h" @@ -51,8 +52,6 @@ std::ostream &operator<<(std::ostream &os, const LoD &lod) { } std::ostream &operator<<(std::ostream &os, const LoDTensor &t) { - PADDLE_ENFORCE(t.type().hash_code() == typeid(float).hash_code()); - if (!platform::is_cpu_place(t.place())) { LoDTensor tt; framework::TensorCopy(t, platform::CPUPlace(), &tt); @@ -70,7 +69,13 @@ std::ostream &operator<<(std::ostream &os, const LoDTensor &t) { // only print first ten elements int64_t size = t.numel() < 10 ? t.numel() : 10; for (int64_t i = 0; i < size; ++i) { - os << t.data()[i] << " "; + if (IsType(t.type())) { + os << t.data()[i] << " "; + } else if (IsType(t.type())) { + os << t.data()[i] << " "; + } else { + PADDLE_THROW("LoDTensor data type not in [float, int64_t]"); + } } return os; @@ -85,6 +90,7 @@ std::string LoDToString(const LoD &lod) { LoD SliceInLevel(const LoD &in, size_t level, size_t elem_begin, size_t elem_end) { PADDLE_ENFORCE_LT(level, in.size()); + PADDLE_ENFORCE_LT(elem_begin, elem_end); PADDLE_ENFORCE_LT(elem_end, in[level].size()); LoD res; @@ -380,7 +386,7 @@ void LoDTensor::MergeLoDTensor( LoD new_lod = lod_tensors[0]->lod(); for (size_t i = 1; i < lod_tensors.size(); ++i) { auto *t = lod_tensors[i]; - PADDLE_ENFORCE_EQ(new_type.hash_code(), t->type().hash_code()); + PADDLE_ENFORCE_EQ(new_type, t->type()); PADDLE_ENFORCE_EQ(new_layout, t->layout()); PADDLE_ENFORCE_EQ(framework::product(new_dim) / new_dim[0], @@ -388,6 +394,7 @@ void LoDTensor::MergeLoDTensor( new_dim[0] += t->dims()[0]; auto &lod = t->lod(); + PADDLE_ENFORCE_EQ(new_lod.size(), lod.size()); for (size_t j = 0; j < lod.size(); ++j) { auto &sub_lod = new_lod[j]; auto &offset = sub_lod.back(); @@ -410,5 +417,38 @@ void LoDTensor::MergeLoDTensor( } } +LoD ConvertToLengthBasedLoD(const LoD &offset_lod) { + LoD length_lod; + length_lod.reserve(offset_lod.size()); + for (size_t lvl = 0; lvl < offset_lod.size(); ++lvl) { + std::vector level; + if (offset_lod[lvl].size() > 0) { + level.reserve(offset_lod[lvl].size() - 1); + } + for (size_t idx = 0; idx < offset_lod[lvl].size() - 1; ++idx) { + level.push_back(offset_lod[lvl][idx + 1] - offset_lod[lvl][idx]); + } + length_lod.push_back(level); + } + return length_lod; +} + +LoD ConvertToOffsetBasedLoD(const LoD &length_lod) { + LoD offset_lod; + offset_lod.reserve(length_lod.size()); + for (size_t lvl = 0; lvl < length_lod.size(); ++lvl) { + std::vector level; + level.reserve(length_lod[lvl].size() + 1); + size_t tmp = 0; + level.push_back(tmp); + for (size_t idx = 0; idx < length_lod[lvl].size(); ++idx) { + tmp += length_lod[lvl][idx]; + level.push_back(tmp); + } + offset_lod.push_back(level); + } + return offset_lod; +} + } // namespace framework } // namespace paddle diff --git a/paddle/fluid/framework/lod_tensor.h b/paddle/fluid/framework/lod_tensor.h index 1159fee39b..4a2729373b 100644 --- a/paddle/fluid/framework/lod_tensor.h +++ b/paddle/fluid/framework/lod_tensor.h @@ -226,5 +226,19 @@ extern void WriteToRecordIO(recordio::Writer* writer, extern std::vector ReadFromRecordIO( recordio::Scanner* scanner, const platform::DeviceContext& dev_ctx); +/* + * Convert between length-based LoD and offset-based LoD. + * The implementation of LoDTensor class use offset-based LoD. + * However, we want to expose the more user-friendly length-based + * LoD to the Python side instead. + * + * Example: + * If offset_lod = [[0, 2, 3],[0, 3, 5, 9]] + * then length_lod = [[2, 1], [3, 2, 4]] + */ +LoD ConvertToLengthBasedLoD(const LoD& offset_lod); + +LoD ConvertToOffsetBasedLoD(const LoD& length_lod); + } // namespace framework } // namespace paddle diff --git a/paddle/fluid/framework/lod_tensor_test.cc b/paddle/fluid/framework/lod_tensor_test.cc index 2ceffc9331..38d3cd96d6 100644 --- a/paddle/fluid/framework/lod_tensor_test.cc +++ b/paddle/fluid/framework/lod_tensor_test.cc @@ -26,6 +26,20 @@ namespace paddle { namespace framework { +TEST(LoD, PrintLoDTensor) { + LoDTensor tensor1; + tensor1.mutable_data(platform::CPUPlace()); + tensor1.data()[0] = 0.2; + tensor1.data()[1] = 0.5; + LOG(INFO) << tensor1; + + LoDTensor tensor2; + tensor2.mutable_data(platform::CPUPlace()); + tensor2.data()[0] = 1; + tensor2.data()[1] = 2; + LOG(INFO) << tensor2; +} + TEST(LoD, data) { LoD lod{{0, 1, 2}}; lod.push_back({0, 2, 4, 5}); @@ -37,7 +51,7 @@ TEST(LoD, data) { } } -TEST(LodExpand, test) { +TEST(LoD, ExpandLoD) { LoD lod{{0, 2}}; LoDTensor tensor; tensor.set_lod(lod); @@ -228,6 +242,38 @@ TEST(LoD, CheckAbsLoD) { ASSERT_FALSE(CheckAbsLoD(abs_lod0)); } +TEST(LoD, ConvertToLengthBasedLoD) { + LoD offset_lod; + offset_lod.push_back(std::vector({0, 2})); + offset_lod.push_back(std::vector({0, 1, 3})); + offset_lod.push_back(std::vector({0, 2, 4, 5})); + + LoD length_lod = ConvertToLengthBasedLoD(offset_lod); + + LoD expected; + expected.push_back(std::vector({2})); + expected.push_back(std::vector({1, 2})); + expected.push_back(std::vector({2, 2, 1})); + + EXPECT_EQ(length_lod, expected); +} + +TEST(LoD, ConvertToOffsetBasedLoD) { + LoD length_lod; + length_lod.push_back(std::vector({2})); + length_lod.push_back(std::vector({1, 2})); + length_lod.push_back(std::vector({2, 2, 1})); + + LoD offset_lod = ConvertToOffsetBasedLoD(length_lod); + + LoD expected; + expected.push_back(std::vector({0, 2})); + expected.push_back(std::vector({0, 1, 3})); + expected.push_back(std::vector({0, 2, 4, 5})); + + EXPECT_EQ(offset_lod, expected); +} + template static void TestRecordIO() { LoDTensor tensor; diff --git a/paddle/fluid/framework/lod_tensor_test.cu b/paddle/fluid/framework/lod_tensor_test.cu index e3efbe4c46..b9950627ca 100644 --- a/paddle/fluid/framework/lod_tensor_test.cu +++ b/paddle/fluid/framework/lod_tensor_test.cu @@ -17,9 +17,9 @@ #include #include "gtest/gtest.h" -#include "paddle/fluid/framework/init.h" #include "paddle/fluid/framework/lod_tensor.h" #include "paddle/fluid/platform/assert.h" +#include "paddle/fluid/platform/init.h" #include "paddle/fluid/platform/place.h" __global__ void test(size_t* a, int size) { diff --git a/paddle/fluid/framework/op_desc.cc b/paddle/fluid/framework/op_desc.cc index f92769192c..a190199f1c 100644 --- a/paddle/fluid/framework/op_desc.cc +++ b/paddle/fluid/framework/op_desc.cc @@ -211,6 +211,12 @@ void OpDesc::SetBlockAttr(const std::string &name, BlockDesc *block) { need_update_ = true; } +void OpDesc::SetBlocksAttr(const std::string &name, + std::vector blocks) { + this->attrs_[name] = blocks; + need_update_ = true; +} + void OpDesc::SetAttrMap( const std::unordered_map &attr_map) { attrs_ = attr_map; @@ -305,6 +311,13 @@ struct SetAttrDescVisitor : public boost::static_visitor { void operator()(const std::vector &v) const { VectorToRepeated(v, attr_->mutable_bools()); } + void operator()(const std::vector &v) const { + std::vector blocks_idx; + for (auto blk : v) { + blocks_idx.push_back(blk->ID()); + } + VectorToRepeated(blocks_idx, attr_->mutable_blocks_idx()); + } void operator()(BlockDesc *desc) const { attr_->set_block_idx(desc->ID()); } void operator()(int64_t v) const { attr_->set_l(v); } void operator()(boost::blank) const { PADDLE_THROW("Unexpected branch"); } diff --git a/paddle/fluid/framework/op_desc.h b/paddle/fluid/framework/op_desc.h index a02d3e2691..74dd8ec002 100644 --- a/paddle/fluid/framework/op_desc.h +++ b/paddle/fluid/framework/op_desc.h @@ -77,6 +77,8 @@ class OpDesc { void SetBlockAttr(const std::string &name, BlockDesc *block); + void SetBlocksAttr(const std::string &name, std::vector blocks); + Attribute GetAttr(const std::string &name) const; Attribute GetNullableAttr(const std::string &name) const; diff --git a/paddle/fluid/framework/op_info.cc b/paddle/fluid/framework/op_info.cc index f1261dee03..af75baa5c4 100644 --- a/paddle/fluid/framework/op_info.cc +++ b/paddle/fluid/framework/op_info.cc @@ -21,8 +21,8 @@ namespace framework { // a static local variable is already being initialized. // https://stackoverflow.com/questions/11711920/how-to-implement-multithread-safe-singleton-in-c11-without-using-mutex OpInfoMap& OpInfoMap::Instance() { - static OpInfoMap* g_op_info_map = new OpInfoMap(); - return *g_op_info_map; + static OpInfoMap g_op_info_map; + return g_op_info_map; } } // namespace framework } // namespace paddle diff --git a/paddle/fluid/framework/op_kernel_type.h b/paddle/fluid/framework/op_kernel_type.h index f51a184e7b..c59b232191 100644 --- a/paddle/fluid/framework/op_kernel_type.h +++ b/paddle/fluid/framework/op_kernel_type.h @@ -97,7 +97,7 @@ inline bool NeedTransformLayout(const DataLayout& l, const DataLayout& r) { return ret; } -inline bool TransFromNeeded(const OpKernelType& l, const OpKernelType& r) { +inline bool NeedTransform(const OpKernelType& l, const OpKernelType& r) { return (!platform::places_are_same_class(l.place_, r.place_)) || (l.data_type_ != r.data_type_) || NeedTransformLayout(l.data_layout_, r.data_layout_); diff --git a/paddle/fluid/framework/op_registry.h b/paddle/fluid/framework/op_registry.h index 43ab227a94..e7dfa608b4 100644 --- a/paddle/fluid/framework/op_registry.h +++ b/paddle/fluid/framework/op_registry.h @@ -76,6 +76,20 @@ class OpRegistry { template struct OpKernelRegistrarFunctor; +template +inline void RegisterKernelClass(const char* op_type, const char* library_type, + Func func) { + std::string library(library_type); + std::string data_layout = "ANYLAYOUT"; + if (library == "MKLDNN") { + data_layout = "MKLDNNLAYOUT"; + } + OpKernelType key(ToDataType(std::type_index(typeid(T))), PlaceType(), + StringToDataLayout(data_layout), + StringToLibraryType(library_type)); + OperatorWithKernel::AllOpKernels()[op_type][key] = func; +} + template struct OpKernelRegistrarFunctor { using KERNEL_TYPE = @@ -83,16 +97,10 @@ struct OpKernelRegistrarFunctor { void operator()(const char* op_type, const char* library_type) const { using T = typename KERNEL_TYPE::ELEMENT_TYPE; - std::string library(library_type); - std::string data_layout = "ANYLAYOUT"; - if (library == "MKLDNN") { - data_layout = "MKLDNNLAYOUT"; - } - OpKernelType key(ToDataType(std::type_index(typeid(T))), PlaceType(), - StringToDataLayout(data_layout), - StringToLibraryType(library_type)); - OperatorWithKernel::AllOpKernels()[op_type][key].reset(new KERNEL_TYPE); - + RegisterKernelClass( + op_type, library_type, [](const framework::ExecutionContext& ctx) { + KERNEL_TYPE().Compute(ctx); + }); constexpr auto size = std::tuple_size>::value; OpKernelRegistrarFunctor func; @@ -116,6 +124,47 @@ class OpKernelRegistrar : public Registrar { } }; +template +struct OpKernelRegistrarFunctorEx; + +template +class OpKernelRegistrarEx : public Registrar { + public: + explicit OpKernelRegistrarEx(const char* op_type, const char* library_type) { + OpKernelRegistrarFunctorEx + func; + func(op_type, library_type); + } +}; + +template +struct OpKernelRegistrarFunctorEx { + void operator()(const char* op_type, const char* library_type) const {} +}; + +template +struct OpKernelRegistrarFunctorEx { + using Functor = + typename std::tuple_element>::type; + using T = + typename std::tuple_element>::type; + + void operator()(const char* op_type, const char* library_type) const { + RegisterKernelClass(op_type, library_type, Functor()); + + constexpr auto size = + std::tuple_size>::value; + OpKernelRegistrarFunctorEx= size, I + 2, + DataTypeAndKernelType...> + func; + func(op_type, library_type); + } +}; + /** * check if MACRO is used in GLOBAL NAMESPACE. */ @@ -133,21 +182,15 @@ class OpKernelRegistrar : public Registrar { VarTypeInference InferShapeBase */ -#define REGISTER_OPERATOR(op_type, op_class, ...) \ - STATIC_ASSERT_GLOBAL_NAMESPACE( \ - __reg_op__##op_type, \ - "REGISTER_OPERATOR must be called in global namespace"); \ - class _OpClass_##op_type##_ : public op_class { \ - public: \ - DEFINE_OP_CLONE_METHOD(_OpClass_##op_type##_); \ - DEFINE_OP_CONSTRUCTOR(_OpClass_##op_type##_, op_class); \ - }; \ - static ::paddle::framework::OperatorRegistrar<_OpClass_##op_type##_, \ - ##__VA_ARGS__> \ - __op_registrar_##op_type##__(#op_type); \ - int TouchOpRegistrar_##op_type() { \ - __op_registrar_##op_type##__.Touch(); \ - return 0; \ +#define REGISTER_OPERATOR(op_type, op_class, ...) \ + STATIC_ASSERT_GLOBAL_NAMESPACE( \ + __reg_op__##op_type, \ + "REGISTER_OPERATOR must be called in global namespace"); \ + static ::paddle::framework::OperatorRegistrar \ + __op_registrar_##op_type##__(#op_type); \ + int TouchOpRegistrar_##op_type() { \ + __op_registrar_##op_type##__.Touch(); \ + return 0; \ } #define REGISTER_OP_WITHOUT_GRADIENT(op_type, op_class, op_maker_class) \ @@ -174,6 +217,25 @@ class OpKernelRegistrar : public Registrar { #define REGISTER_OP_CPU_KERNEL(op_type, ...) \ REGISTER_OP_KERNEL(op_type, CPU, ::paddle::platform::CPUPlace, __VA_ARGS__) +#define REGISTER_OP_KERNEL_EX(op_type, library_type, place_class, ...) \ + STATIC_ASSERT_GLOBAL_NAMESPACE( \ + __reg_op_kernel_##op_type##_##library_type##__, \ + "REGISTER_OP_KERNEL_EX must be called in global namespace"); \ + static ::paddle::framework::OpKernelRegistrarEx \ + __op_kernel_registrar_##op_type##_##library_type##__(#op_type, \ + #library_type); \ + int TouchOpKernelRegistrar_##op_type##_##library_type() { \ + __op_kernel_registrar_##op_type##_##library_type##__.Touch(); \ + return 0; \ + } + +#define REGISTER_OP_CUDA_KERNEL_FUNCTOR(op_type, ...) \ + REGISTER_OP_KERNEL_EX(op_type, CUDA, ::paddle::platform::CUDAPlace, \ + __VA_ARGS__) + +#define REGISTER_OP_CPU_KERNEL_FUNCTOR(op_type, ...) \ + REGISTER_OP_KERNEL_EX(op_type, CPU, ::paddle::platform::CPUPlace, __VA_ARGS__) + /** * Macro to mark what Operator and Kernel * we will use and tell the compiler to diff --git a/paddle/fluid/framework/op_registry_test.cc b/paddle/fluid/framework/op_registry_test.cc index 18b1649cc7..04996d7b09 100644 --- a/paddle/fluid/framework/op_registry_test.cc +++ b/paddle/fluid/framework/op_registry_test.cc @@ -193,15 +193,10 @@ TEST(OpRegistry, CustomChecker) { ASSERT_EQ(test_attr, 4); } -class CosineOpComplete : public paddle::framework::CosineOp { - public: - DEFINE_OP_CONSTRUCTOR(CosineOpComplete, paddle::framework::CosineOp); - DEFINE_OP_CLONE_METHOD(CosineOpComplete); -}; - TEST(OperatorRegistrar, Test) { paddle::framework::OperatorRegistrar< - CosineOpComplete, paddle::framework::CosineOpProtoAndCheckerMaker> + paddle::framework::CosineOp, + paddle::framework::CosineOpProtoAndCheckerMaker> reg("cos"); } diff --git a/paddle/fluid/framework/operator.cc b/paddle/fluid/framework/operator.cc index 122ee1dab3..d1dc5fcd97 100644 --- a/paddle/fluid/framework/operator.cc +++ b/paddle/fluid/framework/operator.cc @@ -592,8 +592,7 @@ static void CheckTensorNANOrInf(const std::string& name, if (tensor.memory_size() == 0) { return; } - if (tensor.type().hash_code() != typeid(float).hash_code() && // NOLINT - tensor.type().hash_code() != typeid(double).hash_code()) { // NOLINT + if (!IsType(tensor.type()) && !IsType(tensor.type())) { return; } PADDLE_ENFORCE(!framework::TensorContainsInf(tensor), @@ -620,8 +619,6 @@ void OperatorWithKernel::RunImpl(const Scope& scope, "There are no kernels which are registered in the %s operator.", type_); } - ExecutionContext ctx(*this, scope, *dev_ctx); - OpKernelMap& kernels = kernels_iter->second; // TODO(dzhwinter) : kernel fallback mechanism will be added when all the @@ -631,65 +628,54 @@ void OperatorWithKernel::RunImpl(const Scope& scope, // Do selection // } - auto expected_kernel_key = this->GetExpectedKernelType(ctx); + auto expected_kernel_key = + this->GetExpectedKernelType(ExecutionContext(*this, scope, *dev_ctx)); VLOG(3) << "expected_kernel_key:" << expected_kernel_key; auto kernel_iter = kernels.find(expected_kernel_key); +#ifdef PADDLE_WITH_MKLDNN + // workaround for missing MKLDNN kernel when FLAGS_use_mkldnn env var is set + if (kernel_iter == kernels.end() && + expected_kernel_key.library_type_ == LibraryType::kMKLDNN) { + VLOG(3) << "missing MKLDNN kernel: fallbacking to PLAIN one"; + expected_kernel_key.library_type_ = LibraryType::kPlain; + expected_kernel_key.data_layout_ = DataLayout::kAnyLayout; + kernel_iter = kernels.find(expected_kernel_key); + } +#endif if (kernel_iter == kernels.end()) { PADDLE_THROW("op %s does not have kernel for %s", type_, KernelTypeToString(expected_kernel_key)); } - // do data transform - Scope& new_scope = scope.NewScope(); + // do data transformScope &transfer_scope; + std::vector transfered_inplace_vars; + auto* transfer_scope = + TryTransferData(scope, expected_kernel_key, &transfered_inplace_vars); - std::vector inplace_vars; - for (auto& var_name_item : this->Inputs()) { - for (auto& var_name : var_name_item.second) { - auto* var = scope.FindVar(var_name); - if (var && VarIsTensor(var)) { - auto* tensor_in = GetTensorFromVar(var); - if (tensor_in->IsInitialized()) { - auto kernel_type_for_var = this->GetKernelTypeForVar( - var_name_item.first, *tensor_in, expected_kernel_key); - if (TransFromNeeded(kernel_type_for_var, expected_kernel_key)) { - auto out_var_names = OutputVars(true); - if (std::find(out_var_names.begin(), out_var_names.end(), - var_name) != out_var_names.end()) { - inplace_vars.push_back(var_name); - } - VLOG(3) << "Transform Variable " << var_name << " from " - << kernel_type_for_var << " to " << expected_kernel_key; - auto* trans_var = new_scope.Var(var_name); - std::shared_ptr out(new Tensor); - DataTransform(expected_kernel_key, kernel_type_for_var, *tensor_in, - out.get()); - CopyVariableWithTensor(*var, *(out.get()), trans_var); - } - } - } - } + // exec scope is the scope that kernel actually executed on. + const Scope& exec_scope = + (transfer_scope == nullptr ? scope : *transfer_scope); + + if (!(expected_kernel_key.place_ == dev_ctx->GetPlace())) { + dev_ctx = pool.Get(expected_kernel_key.place_); } - auto* new_dev_ctx = pool.Get(expected_kernel_key.place_); - kernel_iter->second->Compute( - ExecutionContext(*this, new_scope, *new_dev_ctx)); + kernel_iter->second(ExecutionContext(*this, exec_scope, *dev_ctx)); - for (auto& var_name : inplace_vars) { - VLOG(3) << "share inplace var " + var_name + " back to it's original scope"; - auto* original_tensor = GetMutableTensorFromVar(scope.FindVar(var_name)); - auto* transformed_tensor = GetTensorFromVar(new_scope.FindVar(var_name)); - original_tensor->ShareDataWith(*transformed_tensor); + if (!transfered_inplace_vars.empty()) { + // there is inplace variable has been transfered. + TransferInplaceVarsBack(scope, transfered_inplace_vars, *transfer_scope); } /*For profiling/benchmark only*/ if (FLAGS_benchmark) { - new_dev_ctx->Wait(); + dev_ctx->Wait(); } if (FLAGS_check_nan_inf) { for (auto& vname : OutputVars(true)) { - auto* var = new_scope.FindVar(vname); + auto* var = exec_scope.FindVar(vname); if (var == nullptr) continue; if (var->IsType()) { CheckTensorNANOrInf(vname, var->Get()); @@ -697,6 +683,64 @@ void OperatorWithKernel::RunImpl(const Scope& scope, } } } +void OperatorWithKernel::TransferInplaceVarsBack( + const Scope& scope, const std::vector& inplace_vars, + const Scope& transfer_scope) const { + for (auto& var_name : inplace_vars) { + VLOG(3) << "share inplace var " + var_name + " back to it's original scope"; + auto* original_tensor = GetMutableTensorFromVar(scope.FindVar(var_name)); + auto* transformed_tensor = + GetTensorFromVar(transfer_scope.FindVar(var_name)); + original_tensor->ShareDataWith(*transformed_tensor); + } +} + +Scope* OperatorWithKernel::TryTransferData( + const Scope& scope, const OpKernelType& expected_kernel_key, + std::vector* transfered_inplace_vars) const { + Scope* new_scope = nullptr; + for (auto& var_name_item : Inputs()) { + for (auto& var_name : var_name_item.second) { + auto* var = scope.FindVar(var_name); + // Only tensor can be tranfer to another device. + if (var == nullptr || !VarIsTensor(var)) { + continue; + } + + auto* tensor_in = GetTensorFromVar(var); + if (!tensor_in->IsInitialized()) { + continue; + } + + auto kernel_type_for_var = GetKernelTypeForVar( + var_name_item.first, *tensor_in, expected_kernel_key); + + if (!NeedTransform(kernel_type_for_var, expected_kernel_key)) { + continue; + } + + auto out_var_names = OutputVars(true); + if (std::find(out_var_names.begin(), out_var_names.end(), var_name) != + out_var_names.end()) { + transfered_inplace_vars->emplace_back(var_name); + } + + VLOG(3) << "Transform Variable " << var_name << " from " + << kernel_type_for_var << " to " << expected_kernel_key; + + if (new_scope == nullptr) { + new_scope = &scope.NewScope(); + } + + auto* trans_var = new_scope->Var(var_name); + Tensor out; + TransformData(expected_kernel_key, kernel_type_for_var, *tensor_in, &out); + SetTensorToVariable(*var, out, trans_var); + } + } + + return new_scope; +} proto::VarType::Type OperatorWithKernel::IndicateDataType( const ExecutionContext& ctx) const { diff --git a/paddle/fluid/framework/operator.h b/paddle/fluid/framework/operator.h index b1d75d0d0f..1040eb882b 100644 --- a/paddle/fluid/framework/operator.h +++ b/paddle/fluid/framework/operator.h @@ -121,10 +121,6 @@ class OperatorBase { //! Get all outputs variable names virtual std::vector OutputVars(bool has_intermediate) const; - // Return a new operator instance, which is as same as this. - // Use unique_ptr to prevent caller forget to delete this pointer. - virtual std::unique_ptr Clone() const = 0; - protected: std::string type_; // NOTE: in case of OpGrad, inputs_ contains: @@ -145,37 +141,6 @@ class OperatorBase { const platform::Place& place) const = 0; }; -// Macro for define a clone method. -// If you are writing an kernel operator, `Clone` will be defined when you -// register it. i.e. `Clone` method is not needed to define by yourself. -#define DEFINE_OP_CLONE_METHOD(cls) \ - std::unique_ptr<::paddle::framework::OperatorBase> Clone() const final { \ - return std::unique_ptr<::paddle::framework::OperatorBase>(new cls(*this)); \ - } - -// Macro for define a default constructor for Operator. -// You can also use -// using PARENT_CLASS::PARENT_CLASS; -// to use parent's constructor. -#define DEFINE_OP_CONSTRUCTOR(cls, parent_cls) \ - cls(const std::string& type, \ - const ::paddle::framework::VariableNameMap& inputs, \ - const ::paddle::framework::VariableNameMap& outputs, \ - const paddle::framework::AttributeMap& attrs) \ - : parent_cls(type, inputs, outputs, attrs) {} - -class NOP : public OperatorBase { - public: - using OperatorBase::OperatorBase; - std::unique_ptr Clone() const override { - return std::unique_ptr(new NOP(*this)); - } - - private: - void RunImpl(const Scope& scope, - const platform::Place& place) const override {} -}; - class ExecutionContext { public: ExecutionContext(const OperatorBase& op, const Scope& scope, @@ -347,9 +312,9 @@ class OpKernel : public OpKernelBase { class OperatorWithKernel : public OperatorBase { public: + using OpKernelFunc = std::function; using OpKernelMap = - std::unordered_map, - OpKernelType::Hash>; + std::unordered_map; OperatorWithKernel(const std::string& type, const VariableNameMap& inputs, const VariableNameMap& outputs, const AttributeMap& attrs) @@ -384,6 +349,20 @@ class OperatorWithKernel : public OperatorBase { // same. proto::VarType::Type IndicateDataType(const ExecutionContext& ctx) const; void RunImpl(const Scope& scope, const platform::Place& place) const final; + + /** + * Transfer data from scope to a transfered scope. If there is no data need to + * be tranfered, it returns nullptr. + * + * * transfered_inplace_vars is a output vector. + */ + Scope* TryTransferData( + const Scope& scope, const OpKernelType& expected_kernel_key, + std::vector* transfered_inplace_vars) const; + + void TransferInplaceVarsBack(const Scope& scope, + const std::vector& inplace_vars, + const Scope& exec_scope) const; }; extern bool OpSupportGPU(const std::string& op_type); diff --git a/paddle/fluid/framework/operator_test.cc b/paddle/fluid/framework/operator_test.cc index 74043b5d79..ac9dd8245a 100644 --- a/paddle/fluid/framework/operator_test.cc +++ b/paddle/fluid/framework/operator_test.cc @@ -13,10 +13,10 @@ See the License for the specific language governing permissions and limitations under the License. */ #include "gtest/gtest.h" -#include "paddle/fluid/framework/init.h" #include "paddle/fluid/framework/op_info.h" #include "paddle/fluid/framework/op_registry.h" #include "paddle/fluid/framework/operator.h" +#include "paddle/fluid/platform/init.h" namespace paddle { namespace framework { @@ -247,26 +247,3 @@ TEST(OpKernel, multi_inputs) { auto op = paddle::framework::OpRegistry::CreateOp(op_desc); op->Run(scope, cpu_place); } - -class OperatorClone : public paddle::framework::OperatorBase { - public: - DEFINE_OP_CLONE_METHOD(OperatorClone); - OperatorClone(const std::string& type, - const paddle::framework::VariableNameMap& inputs, - const paddle::framework::VariableNameMap& outputs, - const paddle::framework::AttributeMap& attrs) - : OperatorBase(type, inputs, outputs, attrs) {} - - private: - void RunImpl(const paddle::framework::Scope& scope, - const paddle::platform::Place& place) const override {} -}; - -TEST(Operator, Clone) { - paddle::framework::InitDevices(true); - OperatorClone a("ABC", paddle::framework::VariableNameMap{}, - paddle::framework::VariableNameMap{}, - paddle::framework::AttributeMap{}); - auto b = a.Clone(); - ASSERT_EQ(a.Type(), b->Type()); -} diff --git a/paddle/fluid/framework/parallel_executor.cc b/paddle/fluid/framework/parallel_executor.cc index 9406c6155d..b53a6f43fb 100644 --- a/paddle/fluid/framework/parallel_executor.cc +++ b/paddle/fluid/framework/parallel_executor.cc @@ -110,7 +110,6 @@ ParallelExecutor::ParallelExecutor( // Step 3. Convert main_program to SSA form and dependency graph. Also, insert // ncclOp - details::SSAGraphBuilderFactory builder_factory( member_->places_, loss_var_name, params, member_->local_scopes_, build_strategy); @@ -122,9 +121,10 @@ ParallelExecutor::ParallelExecutor( #endif } + builder_ = builder_factory.Create(); member_->executor_.reset(new details::ThreadedSSAGraphExecutor( exec_strategy, member_->local_scopes_, places, - builder_factory.Create()->Build(main_program))); + builder_->Build(main_program))); member_->executor_.reset(new details::ScopeBufferedSSAGraphExecutor( exec_strategy, member_->local_scopes_, std::move(var_infos), @@ -133,10 +133,23 @@ ParallelExecutor::ParallelExecutor( void ParallelExecutor::BCastParamsToGPUs( const std::unordered_set &vars) const { - auto *main_scope = member_->local_scopes_[0]; + // the the initializing bcast, all vars would be bcast from device(0), + // otherwise + // bcast from the specified device. + bool initializing = builder_.get() == nullptr ? true : false; for (auto &var : vars) { - auto *main_var = main_scope->FindVar(var); + int var_dev_id = + builder_.get() == nullptr ? -1 : builder_->GetVarDeviceID(var); + if (!initializing && var_dev_id == -1) continue; + + framework::Variable *main_var = nullptr; + if (initializing) { + main_var = member_->local_scopes_[0]->FindVar(var); + } else { + main_var = member_->local_scopes_[var_dev_id]->FindVar(var); + } + if (main_var == nullptr || !main_var->IsType()) { continue; } @@ -151,7 +164,9 @@ void ParallelExecutor::BCastParamsToGPUs( for (size_t i = 0; i < member_->places_.size(); ++i) { auto place = member_->places_[i]; void *buffer; - if (i == 0) { + + if ((initializing && i == 0) || + (!initializing && static_cast(i) == var_dev_id)) { buffer = const_cast(main_tensor.data()); } else { auto local_scope = member_->local_scopes_[i]; @@ -168,8 +183,16 @@ void ParallelExecutor::BCastParamsToGPUs( platform::NCCLGroupGuard guard; for (size_t i = 0; i < member_->places_.size(); ++i) { auto &nccl_ctx = member_->nccl_ctxs_->at(member_->places_[i]); - platform::dynload::ncclBcast(buffers[i], numel, data_type, 0, - nccl_ctx.comm_, nccl_ctx.stream()); + if (initializing) { + platform::dynload::ncclBcast(buffers[i], numel, data_type, 0, + nccl_ctx.comm_, nccl_ctx.stream()); + } else { + if (var_dev_id >= 0) { + platform::dynload::ncclBcast(buffers[i], numel, data_type, + var_dev_id, nccl_ctx.comm_, + nccl_ctx.stream()); + } + } } member_->nccl_ctxs_->WaitAll(); } diff --git a/paddle/fluid/framework/parallel_executor.h b/paddle/fluid/framework/parallel_executor.h index 5247e79064..058f83f07c 100644 --- a/paddle/fluid/framework/parallel_executor.h +++ b/paddle/fluid/framework/parallel_executor.h @@ -19,12 +19,14 @@ limitations under the License. */ #include #include #include "paddle/fluid/framework/details/execution_strategy.h" +#include "paddle/fluid/framework/details/multi_devices_graph_builder.h" #include "paddle/fluid/framework/executor.h" #include "paddle/fluid/framework/op_info.h" #include "paddle/fluid/framework/program_desc.h" #include "paddle/fluid/framework/scope.h" #include "paddle/fluid/framework/tensor.h" #include "paddle/fluid/platform/device_context.h" + namespace paddle { namespace framework { @@ -68,6 +70,7 @@ class ParallelExecutor { private: ParallelExecutorPrivate *member_; + std::unique_ptr builder_; }; } // namespace framework diff --git a/paddle/fluid/framework/reader.cc b/paddle/fluid/framework/reader.cc index 0b36f1116d..5897d320a8 100644 --- a/paddle/fluid/framework/reader.cc +++ b/paddle/fluid/framework/reader.cc @@ -13,29 +13,61 @@ // limitations under the License. #include "paddle/fluid/framework/reader.h" +#include namespace paddle { namespace framework { -ReaderBase::~ReaderBase() {} -FileReader::FileReader(const std::vector &dims) : dims_(dims) {} - -void FileReader::ReadNext(std::vector *out) { +void ReaderBase::ReadNext(std::vector *out) { + std::lock_guard lock(mu_); + PADDLE_ENFORCE_EQ(status_, ReaderStatus::kRunning); ReadNextImpl(out); - if (out->empty()) { - return; - } +} - PADDLE_ENFORCE_EQ(out->size(), dims_.size()); - for (size_t i = 0; i < dims_.size(); ++i) { - auto &actual = (*out)[i].dims(); - auto &expect = dims_[i]; +void ReaderBase::InsertDecoratedReader( + const std::shared_ptr &decorated_reader) { + std::lock_guard guard(mu_); + decorated_readers_.emplace_back(decorated_reader); +} - PADDLE_ENFORCE_EQ(actual.size(), expect.size()); - for (int j = 0; j < actual.size(); ++j) { - // PADDLE_ENFORCE(actual[i] == expect[i] || expect[i] == -1); +std::unordered_set ReaderBase::GetEndPoints() { + std::unordered_set result; + std::deque queue; + queue.emplace_back(this); + while (!queue.empty()) { // BFS search + auto *front = queue.front(); + queue.pop_front(); + if (front->decorated_readers_.empty()) { + result.emplace(front); + } else { + for (auto &reader : front->decorated_readers_) { + if (auto *reader_ptr = reader.lock().get()) { + queue.emplace_back(reader_ptr); + } + } } } + + return result; } + +void ReaderBase::Shutdown() { + std::lock_guard lock(mu_); + if (status_ != ReaderStatus::kStopped) { + ShutdownImpl(); + status_ = ReaderStatus::kStopped; + } +} + +void ReaderBase::Start() { + std::lock_guard lock(mu_); + if (status_ != ReaderStatus::kRunning) { + StartImpl(); + status_ = ReaderStatus::kRunning; + } +} + +ReaderBase::~ReaderBase() { Shutdown(); } + } // namespace framework } // namespace paddle diff --git a/paddle/fluid/framework/reader.h b/paddle/fluid/framework/reader.h index 64d4ceab62..6c4432cb7a 100644 --- a/paddle/fluid/framework/reader.h +++ b/paddle/fluid/framework/reader.h @@ -15,6 +15,7 @@ #pragma once #include +#include #include #include "paddle/fluid/framework/ddim.h" @@ -24,61 +25,116 @@ namespace paddle { namespace framework { +enum ReaderStatus { kRunning, kStopped }; + class ReaderBase { public: - virtual void ReadNext(std::vector* out) = 0; + void ReadNext(std::vector* out); + + void Shutdown(); - virtual void ReInit() = 0; + void Start(); + + // Return the readers which are the end of decorating chain. Basically + // they are readers just before read op. + std::unordered_set GetEndPoints(); virtual ~ReaderBase(); + + protected: + virtual void ReadNextImpl(std::vector* out) = 0; + + virtual void ShutdownImpl() {} + + virtual void StartImpl() {} + + ReaderStatus status_{kRunning}; + + mutable std::mutex mu_; + + private: + friend class DecoratedReader; + // These methods can be only invoked inside DecoratedReader to record the + // decorating chain. + void InsertDecoratedReader( + const std::shared_ptr& decorated_reader); + // A set of which readers that decorated this reader. + std::vector> decorated_readers_; }; -class DecoratedReader : public ReaderBase { +class DecoratedReader : public ReaderBase, + public std::enable_shared_from_this { public: explicit DecoratedReader(const std::shared_ptr& reader) : ReaderBase(), reader_(reader) { PADDLE_ENFORCE_NOT_NULL(reader_); } - void ReInit() override { reader_->ReInit(); } + void RegisterDecorateChain() { + reader_->InsertDecoratedReader(shared_from_this()); + } protected: - std::shared_ptr reader_; -}; - -class FileReader : public ReaderBase { - public: - explicit FileReader(const std::vector& dims); - - void ReadNext(std::vector* out) override; + void ShutdownImpl() override { reader_->Shutdown(); } - protected: - virtual void ReadNextImpl(std::vector* out) = 0; + void StartImpl() override { reader_->Start(); } - private: - std::vector dims_; + std::shared_ptr reader_; }; +// FileReader is just a conceptual class. +class FileReader : public ReaderBase {}; + // The ReaderHolder is used as reader' unified wrapper, // making it easier to access different type reader in Variables. class ReaderHolder { public: - void Reset(ReaderBase* reader) { reader_.reset(reader); } + template + void Reset(const std::shared_ptr& reader) { + auto reader_base = std::dynamic_pointer_cast(reader); + PADDLE_ENFORCE_NOT_NULL(reader_base); + reader_ = reader_base; + } - std::shared_ptr Get() const { return reader_; } + const std::shared_ptr& Get() const { return reader_; } void ReadNext(std::vector* out) { PADDLE_ENFORCE_NOT_NULL(reader_); reader_->ReadNext(out); } - void ReInit() { + + void ResetAll() { + auto end_readers = reader_->GetEndPoints(); + for (auto* reader : end_readers) { + reader->Shutdown(); + } + for (auto* reader : end_readers) { + reader->Start(); + } + } + + void Shutdown() { + PADDLE_ENFORCE_NOT_NULL(reader_); + reader_->Shutdown(); + } + + void Start() { PADDLE_ENFORCE_NOT_NULL(reader_); - reader_->ReInit(); + reader_->Start(); } + operator const std::shared_ptr&() const { return this->reader_; } + private: std::shared_ptr reader_; }; +template +inline std::shared_ptr MakeDecoratedReader(ARGS&&... args) { + std::shared_ptr reader(new T(std::forward(args)...)); + reader->RegisterDecorateChain(); + return reader; +} + } // namespace framework } // namespace paddle diff --git a/paddle/fluid/framework/reader_test.cc b/paddle/fluid/framework/reader_test.cc new file mode 100644 index 0000000000..f0d07cb7c1 --- /dev/null +++ b/paddle/fluid/framework/reader_test.cc @@ -0,0 +1,52 @@ +// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "paddle/fluid/framework/reader.h" +#include +#include "gtest/gtest.h" + +class StubDecoratedReader : public paddle::framework::DecoratedReader { + public: + explicit StubDecoratedReader(const std::shared_ptr &reader) + : DecoratedReader(reader) {} + + void ReadNextImpl(std::vector *out) override {} +}; + +class StubRootReader : public paddle::framework::ReaderBase { + public: + void ReadNextImpl(std::vector *out) override {} +}; + +TEST(READER, decorate_chain) { + auto root = std::make_shared(); + auto end_point1 = + paddle::framework::MakeDecoratedReader(root); + auto end_point2 = + paddle::framework::MakeDecoratedReader(root); + + { + auto endpoints = root->GetEndPoints(); + ASSERT_EQ(endpoints.size(), 2U); + ASSERT_NE(endpoints.count(end_point1.get()), 0); + ASSERT_NE(endpoints.count(end_point2.get()), 0); + } + + { + auto end_point3 = + paddle::framework::MakeDecoratedReader(root); + ASSERT_EQ(root->GetEndPoints().size(), 3U); + } + { ASSERT_EQ(root->GetEndPoints().size(), 2U); } +} diff --git a/paddle/fluid/framework/scope.cc b/paddle/fluid/framework/scope.cc index bb2d866c82..50f374e370 100644 --- a/paddle/fluid/framework/scope.cc +++ b/paddle/fluid/framework/scope.cc @@ -43,48 +43,29 @@ Scope& Scope::NewScope() const { } Variable* Scope::Var(const std::string& name) { - // acquire the lock when new var under this scope std::unique_lock lock(mutex_); - auto* v = FindVarLocally(name); - if (v != nullptr) return v; - - v = new Variable(); - vars_[name].reset(v); - VLOG(3) << "Create variable " << name; - v->name_ = &(vars_.find(name)->first); - return v; + return VarInternal(name); } Variable* Scope::Var(std::string* name) { - auto var_name = string::Sprintf("%p.%d", this, vars_.size()); + std::unique_lock lock(mutex_); + auto new_name = string::Sprintf("%p.%d", this, vars_.size()); if (name != nullptr) { - *name = var_name; + *name = new_name; } - return Var(var_name); + return VarInternal(new_name); } Variable* Scope::FindVar(const std::string& name) const { - // acquire the lock when find var std::unique_lock lock(mutex_); return FindVarInternal(name); } -Variable* Scope::FindVarInternal(const std::string& name) const { - auto var = FindVarLocally(name); - if (var != nullptr) { - return var; - } - return (parent_ == nullptr) ? nullptr : parent_->FindVarInternal(name); -} - const Scope* Scope::FindScope(const Variable* var) const { - for (auto& kv : vars_) { - if (kv.second.get() == var) { - return this; - } - } - return (parent_ == nullptr) ? nullptr : parent_->FindScope(var); + std::unique_lock lock(mutex_); + return FindScopeInternal(var); } + void Scope::DropKids() { std::unique_lock lock(mutex_); for (Scope* s : kids_) delete s; @@ -92,6 +73,7 @@ void Scope::DropKids() { } std::vector Scope::LocalVarNames() const { + std::unique_lock lock(mutex_); std::vector known_vars; known_vars.reserve(this->vars_.size()); for (auto& p : vars_) { @@ -127,6 +109,39 @@ void Scope::EraseVars(const std::vector& var_names) { void Scope::Rename(const std::string& origin_name, const std::string& new_name) const { + std::unique_lock lock(mutex_); + RenameInternal(origin_name, new_name); +} + +std::string Scope::Rename(const std::string& origin_name) const { + std::unique_lock lock(mutex_); + auto new_name = string::Sprintf("%p.%d", this, vars_.size()); + RenameInternal(origin_name, new_name); + return new_name; +} + +Variable* Scope::VarInternal(const std::string& name) { + auto* v = FindVarLocally(name); + if (v != nullptr) return v; + + v = new Variable(); + vars_[name].reset(v); + VLOG(3) << "Create variable " << name; + v->name_ = &(vars_.find(name)->first); + return v; +} + +const Scope* Scope::FindScopeInternal(const Variable* var) const { + for (auto& kv : vars_) { + if (kv.second.get() == var) { + return this; + } + } + return (parent_ == nullptr) ? nullptr : parent_->FindScope(var); +} + +void Scope::RenameInternal(const std::string& origin_name, + const std::string& new_name) const { auto origin_it = vars_.find(origin_name); PADDLE_ENFORCE(origin_it != vars_.end(), "Cannot find original variable with name %s", origin_name); @@ -137,10 +152,12 @@ void Scope::Rename(const std::string& origin_name, vars_.erase(origin_it); } -std::string Scope::Rename(const std::string& origin_name) const { - auto var_name = string::Sprintf("%p.%d", this, vars_.size()); - Rename(origin_name, var_name); - return var_name; +Variable* Scope::FindVarInternal(const std::string& name) const { + auto var = FindVarLocally(name); + if (var != nullptr) { + return var; + } + return (parent_ == nullptr) ? nullptr : parent_->FindVar(name); } Variable* Scope::FindVarLocally(const std::string& name) const { diff --git a/paddle/fluid/framework/scope.h b/paddle/fluid/framework/scope.h index 95b4f7c5f6..e246241c0a 100644 --- a/paddle/fluid/framework/scope.h +++ b/paddle/fluid/framework/scope.h @@ -88,12 +88,20 @@ class Scope { // Call Scope::NewScope for a sub-scope. explicit Scope(Scope const* parent) : parent_(parent) {} + // Called by Var. + Variable* VarInternal(const std::string& name); + + // Called by FindScope. + const Scope* FindScopeInternal(const Variable* var) const; + + // Called by Rename. + void RenameInternal(const std::string& origin_name, + const std::string& new_name) const; + // Called by FindVar recursively. - // Caller doesn't own the returned Variable. Variable* FindVarInternal(const std::string& name) const; // Called by FindVarInternal and Var. - // Caller doesn't own the returned Variable. Variable* FindVarLocally(const std::string& name) const; // Scope in `kids_` are owned by this class. diff --git a/paddle/fluid/framework/tensor_impl.h b/paddle/fluid/framework/tensor_impl.h index 96114678a9..7f678f869a 100644 --- a/paddle/fluid/framework/tensor_impl.h +++ b/paddle/fluid/framework/tensor_impl.h @@ -23,9 +23,9 @@ namespace framework { template inline const T* Tensor::data() const { check_memory_size(); - PADDLE_ENFORCE(std::is_same::value || - holder_->type() == std::type_index(typeid(T)), - "Tensor holds the wrong type, it holds %s", + bool valid = std::is_same::value || + holder_->type() == std::type_index(typeid(T)); + PADDLE_ENFORCE(valid, "Tensor holds the wrong type, it holds %s", this->holder_->type().name()); return reinterpret_cast( @@ -37,9 +37,9 @@ inline bool Tensor::IsInitialized() const { return holder_ != nullptr; } template inline T* Tensor::data() { check_memory_size(); - PADDLE_ENFORCE(std::is_same::value || - holder_->type() == std::type_index(typeid(T)), - "Tensor holds the wrong type, it holds %s", + bool valid = std::is_same::value || + holder_->type() == std::type_index(typeid(T)); + PADDLE_ENFORCE(valid, "Tensor holds the wrong type, it holds %s", this->holder_->type().name()); return reinterpret_cast(reinterpret_cast(holder_->ptr()) + offset_); diff --git a/paddle/fluid/framework/tensor_util.cc b/paddle/fluid/framework/tensor_util.cc index e5bc74755f..f98011e896 100644 --- a/paddle/fluid/framework/tensor_util.cc +++ b/paddle/fluid/framework/tensor_util.cc @@ -69,7 +69,22 @@ void TensorCopy(const Tensor& src, const platform::Place& dst_place, PADDLE_ENFORCE(platform::is_gpu_place(ctx_place)); auto stream = reinterpret_cast(ctx).stream(); - memory::Copy(dst_gpu_place, dst_ptr, src_gpu_place, src_ptr, size, stream); + if (platform::is_same_place(src_place, dst_place)) { + memory::Copy(dst_gpu_place, dst_ptr, src_gpu_place, src_ptr, size, + stream); + } else { + if (platform::is_same_place(ctx_place, src_place)) { + memory::Copy(dst_gpu_place, dst_ptr, src_gpu_place, src_ptr, size, + stream); + platform::DeviceContextPool::Instance().Get(src.place())->Wait(); + } else if (platform::is_same_place(ctx_place, dst_place)) { + platform::DeviceContextPool::Instance().Get(src.place())->Wait(); + memory::Copy(dst_gpu_place, dst_ptr, src_gpu_place, src_ptr, size, + stream); + } else { + PADDLE_THROW("ctx is not belong to dst_gpu_place or src_gpu_place."); + } + } } #endif } @@ -78,10 +93,10 @@ void TensorCopy(const Tensor& src, const platform::Place& dst_place, Tensor* dst) { platform::DeviceContextPool& pool = platform::DeviceContextPool::Instance(); const platform::DeviceContext* dev_ctx; - if (platform::is_gpu_place(src.place())) { - dev_ctx = pool.Get(src.place()); - } else { + if (platform::is_gpu_place(dst_place)) { dev_ctx = pool.Get(dst_place); + } else { + dev_ctx = pool.Get(src.place()); } TensorCopy(src, dst_place, *dev_ctx, dst); } diff --git a/paddle/fluid/framework/tensor_util.h b/paddle/fluid/framework/tensor_util.h index dca279b693..4457382ade 100644 --- a/paddle/fluid/framework/tensor_util.h +++ b/paddle/fluid/framework/tensor_util.h @@ -23,10 +23,25 @@ limitations under the License. */ namespace paddle { namespace framework { +// NOTE(zcd): Because TensorCopy is an async operation, when the src_place +// and dst_place are two different GPU, to ensure that the operation can +// be carried out correctly, there is a src_ctx wait operation in TensorCopy. +// If ctx_place and src_place are the same, src_ctx.Wait() is added +// after memory::Copy; if ctx_place and dst_place are the same, +// src_ctx.Wait() is added before memory::Copy. void TensorCopy(const Tensor& src, const platform::Place& dst_place, const platform::DeviceContext& ctx, Tensor* dst); + +// NOTE(zcd): If the src.place() and dst_place are two different GPU, +// the copy operation is carried out on the dst_place's stream. This is +// very important, because TensorCopy is an async operator, and in most +// case, once this copy operator returns, dst is to be used in dst_place's +// stream, if this copy operation is carried out on the src_place's stream, +// when dst is used in dst_place's stream the copy operation may be +// not completed. void TensorCopy(const Tensor& src, const platform::Place& dst_place, Tensor* dst); + void TensorCopySync(const Tensor& src, const platform::Place& dst_place, Tensor* dst); diff --git a/paddle/fluid/framework/type_defs.h b/paddle/fluid/framework/type_defs.h index 4879209ece..e099e40f12 100644 --- a/paddle/fluid/framework/type_defs.h +++ b/paddle/fluid/framework/type_defs.h @@ -35,7 +35,8 @@ using VariableNameMap = std::map>; using Attribute = boost::variant, std::vector, std::vector, bool, - std::vector, BlockDesc*, int64_t>; + std::vector, BlockDesc*, int64_t, + std::vector>; using AttributeMap = std::unordered_map; diff --git a/paddle/fluid/framework/var_type.h b/paddle/fluid/framework/var_type.h index 2b646d78f0..429997c8b8 100644 --- a/paddle/fluid/framework/var_type.h +++ b/paddle/fluid/framework/var_type.h @@ -24,18 +24,24 @@ limitations under the License. */ namespace paddle { namespace framework { + +template +bool IsType(const std::type_index& type_index) { + return type_index == std::type_index(typeid(T)); +} + inline proto::VarType::Type ToVarType(std::type_index type) { - if (type.hash_code() == typeid(LoDTensor).hash_code()) { + if (IsType(type)) { return proto::VarType_Type_LOD_TENSOR; - } else if (type.hash_code() == typeid(LoDRankTable).hash_code()) { + } else if (IsType(type)) { return proto::VarType_Type_LOD_RANK_TABLE; - } else if (type.hash_code() == typeid(LoDTensorArray).hash_code()) { + } else if (IsType(type)) { return proto::VarType_Type_LOD_TENSOR_ARRAY; - } else if (type.hash_code() == typeid(SelectedRows).hash_code()) { + } else if (IsType(type)) { return proto::VarType_Type_SELECTED_ROWS; - } else if (type.hash_code() == typeid(ReaderHolder).hash_code()) { + } else if (IsType(type)) { return proto::VarType_Type_READER; - } else if (type.hash_code() == typeid(ChannelHolder).hash_code()) { + } else if (IsType(type)) { return proto::VarType_Type_CHANNEL; } else { PADDLE_THROW("ToVarType:Unsupported type %s", type.name()); diff --git a/paddle/fluid/framework/var_type_inference_test.cc b/paddle/fluid/framework/var_type_inference_test.cc index 14b81ddfec..7842168f60 100644 --- a/paddle/fluid/framework/var_type_inference_test.cc +++ b/paddle/fluid/framework/var_type_inference_test.cc @@ -22,6 +22,17 @@ limitations under the License. */ namespace paddle { namespace framework { +class NOP : public OperatorBase { + public: + NOP(const std::string &type, const VariableNameMap &inputs, + const VariableNameMap &outputs, const AttributeMap &attrs) + : OperatorBase(type, inputs, outputs, attrs) {} + + private: + void RunImpl(const Scope &scope, + const platform::Place &place) const override {} +}; + class SumOpMaker : public OpProtoAndCheckerMaker { public: void Make() { diff --git a/paddle/fluid/inference/CMakeLists.txt b/paddle/fluid/inference/CMakeLists.txt index ec16a1c600..1895aea7f9 100644 --- a/paddle/fluid/inference/CMakeLists.txt +++ b/paddle/fluid/inference/CMakeLists.txt @@ -1,4 +1,4 @@ -set(FLUID_CORE_MODULES proto_desc memory lod_tensor executor init) +set(FLUID_CORE_MODULES proto_desc memory lod_tensor executor ) # TODO(panyx0718): Should this be called paddle_fluid_inference_api_internal? cc_library(paddle_fluid_api @@ -28,9 +28,10 @@ endif() if(WITH_TESTING) # both tests/book and analysis depends the models that generated by python/paddle/fluid/tests/book add_subdirectory(tests/book) - add_subdirectory(analysis) endif() +add_subdirectory(analysis) + if (TENSORRT_FOUND) add_subdirectory(tensorrt) endif() diff --git a/paddle/fluid/inference/analysis/CMakeLists.txt b/paddle/fluid/inference/analysis/CMakeLists.txt index 5083578444..cdd67fdc92 100644 --- a/paddle/fluid/inference/analysis/CMakeLists.txt +++ b/paddle/fluid/inference/analysis/CMakeLists.txt @@ -1,23 +1,38 @@ -set(FLUID_CORE_MODULES proto_desc memory lod_tensor executor init) -cc_library(analysis SRCS dot.cc node.cc data_flow_graph.cc graph_traits.cc subgraph_splitter.cc fluid_to_data_flow_graph_pass.cc - DEPS paddle_fluid) +cc_library(analysis SRCS pass_manager.cc dot.cc node.cc data_flow_graph.cc graph_traits.cc subgraph_splitter.cc + fluid_to_data_flow_graph_pass.cc + data_flow_graph_to_fluid_pass.cc + dfg_graphviz_draw_pass.cc + tensorrt_subgraph_pass.cc + tensorrt_subgraph_node_mark_pass.cc + analyzer.cc + helper.cc + DEPS framework_proto proto_desc) cc_test(test_node SRCS node_tester.cc DEPS analysis) cc_test(test_dot SRCS dot_tester.cc DEPS analysis) set(PYTHON_TESTS_DIR ${PADDLE_BINARY_DIR}/python/paddle/fluid/tests) -cc_test(test_data_flow_graph SRCS data_flow_graph_tester.cc DEPS analysis ${FLUID_CORE_MODULES} paddle_fluid - ARGS --inference_model_dir=${PYTHON_TESTS_DIR}/book/word2vec.inference.model) -set_tests_properties(test_data_flow_graph PROPERTIES DEPENDS test_word2vec) +function (inference_analysis_test TARGET) + if(WITH_TESTING) + set(options "") + set(oneValueArgs "") + set(multiValueArgs SRCS) + cmake_parse_arguments(analysis_test "${options}" "${oneValueArgs}" "${multiValueArgs}" ${ARGN}) -cc_test(test_subgraph_splitter - SRCS subgraph_splitter_tester.cc - DEPS analysis paddle_fluid tensor - ARGS --inference_model_dir=${PYTHON_TESTS_DIR}/book/word2vec.inference.model) -set_tests_properties(test_subgraph_splitter PROPERTIES DEPENDS test_word2vec) + cc_test(${TARGET} + SRCS "${analysis_test_SRCS}" + DEPS analysis + ARGS --inference_model_dir=${PYTHON_TESTS_DIR}/book/word2vec.inference.model --fraction_of_gpu_memory_to_use=0.5) + set_tests_properties(${TARGET} PROPERTIES DEPENDS test_word2vec) + endif(WITH_TESTING) +endfunction(inference_analysis_test) -cc_test(test_dfg_graphviz_draw_pass - SRCS dfg_graphviz_draw_pass_tester.cc - DEPS analysis - ARGS --inference_model_dir=${PYTHON_TESTS_DIR}/book/word2vec.inference.model) -set_tests_properties(test_dfg_graphviz_draw_pass PROPERTIES DEPENDS test_word2vec) +inference_analysis_test(test_data_flow_graph SRCS data_flow_graph_tester.cc) +inference_analysis_test(test_data_flow_graph_to_fluid_pass SRCS data_flow_graph_to_fluid_pass_tester.cc) +inference_analysis_test(test_fluid_to_data_flow_graph_pass SRCS fluid_to_data_flow_graph_pass_tester.cc) +inference_analysis_test(test_subgraph_splitter SRCS subgraph_splitter_tester.cc) +inference_analysis_test(test_dfg_graphviz_draw_pass SRCS dfg_graphviz_draw_pass_tester.cc) +inference_analysis_test(test_tensorrt_subgraph_pass SRCS tensorrt_subgraph_pass_tester.cc) +inference_analysis_test(test_pass_manager SRCS pass_manager_tester.cc) +inference_analysis_test(test_tensorrt_subgraph_node_mark_pass SRCS tensorrt_subgraph_node_mark_pass_tester.cc) +inference_analysis_test(test_analyzer SRCS analyzer_tester.cc) diff --git a/paddle/fluid/inference/analysis/README.md b/paddle/fluid/inference/analysis/README.md new file mode 100644 index 0000000000..70adb4a974 --- /dev/null +++ b/paddle/fluid/inference/analysis/README.md @@ -0,0 +1,58 @@ +# Inference Analysis + +The `inference/analysis` module is used to analyze and optimize the inference program, +it references some philosophy from `LLVM/analysis`, +and make the various optimization features be pluggable and co-exist in a pipeline. + +We borrowed some concepts from LLVM, such as + +- [Pass](./pass.h)es to implement optimization that traverse the inference program, +- [DataFlowGraph](./data_flow_graph.h) to represent the data flow graph built from a program, +- [PassManager](./pass_manager.h) to manage a sequence of `Pass`es over a graph. + +There are some other basic concepts here + +- [Node](./node.h), the node in a `DataFlowGraph`, + - `Function`, the Operator in Fluid, + - `Value`, the Variable in Fluid; +- [Argument](./argument.h), the argument that treat as the input and output of all `Pass`es in the pipeline, + +## How it works + +The `inference/analysis` module make all the passes in a pipeline, and works in such way: + +1. Build a `DataFlowGraph` from a Fluid inference ProgramDesc, +2. Call the middle passes one by one, the same `DataFlowGraph` is passed across all the passes, +3. Transform a new ProgramDesc from the modified `DataFlowGraph`. + +The new optimization features can be added as an independent `Pass` and controlled by gflags, +each pass will generate unified debug information or visualization for better debugging. + +## Supported Passes + +### `FluidToDataFlowGraphPass` +Transform the fluid `ProgramDesc` to a `DataFlowGraph` to give an abstract representation for all the middle passes, +this should be the first pass of the pipeline. + +### `DataFlowGraphToFluidPass` +Generate a final `ProgramDesc` from a data flow graph, this should be the last pass of the pipeline. + +### `TensorRTSubgraphNodeMarkPass` +Mark the `Node` that are supported by TensorRT, +this pass will generate a visualization file which can be used for debugging. + +### `TensorRTSubGraphPass` +Split the sub-graph that are can be accelerated by TensorRT. + +### `DFG_GraphvizDrawPass` +This pass is just for debug, it will visualize the `DataFlowGraph` using the [graphviz](http://www.graphviz.org) tool. + +It can be used as a helper class that draws the modified graph after each pass. + +## Utilities + +There is some helper legacy/function/class for analysis. + +- [dot.h](./dot.h) give a easy to use interface for generating `DOT` codes, +- [graph_traits.h](./graph_traits.h) contains the interfaces of the graph traversal algorithms, it uses `iterator`to make the algorithms easy to share across different passes, +there are some implementations in [data_flow_graph.cc](./data_flow_graph.cc) , such as BFS and DFS.. diff --git a/paddle/fluid/inference/analysis/analyzer.cc b/paddle/fluid/inference/analysis/analyzer.cc new file mode 100644 index 0000000000..a4625f008c --- /dev/null +++ b/paddle/fluid/inference/analysis/analyzer.cc @@ -0,0 +1,83 @@ +// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "paddle/fluid/inference/analysis/analyzer.h" +#include +#include "paddle/fluid/inference/analysis/data_flow_graph_to_fluid_pass.h" +#include "paddle/fluid/inference/analysis/dfg_graphviz_draw_pass.h" +#include "paddle/fluid/inference/analysis/fluid_to_data_flow_graph_pass.h" +#include "paddle/fluid/inference/analysis/pass_manager.h" +#include "paddle/fluid/inference/analysis/tensorrt_subgraph_node_mark_pass.h" +#include "paddle/fluid/inference/analysis/tensorrt_subgraph_pass.h" + +namespace paddle { +namespace inference { +namespace analysis { + +DEFINE_bool(inference_analysis_enable_tensorrt_subgraph_engine, false, + "Enable subgraph to TensorRT engine for acceleration"); + +DEFINE_string(inference_analysis_graphviz_log_root, "./", + "Graphviz debuger for data flow graphs."); + +class DfgPassManagerImpl final : public DfgPassManager { + public: + DfgPassManagerImpl() { + // TODO(Superjomn) set the key with pass reprs. + AddPass("fluid-to-data-flow-graph", new FluidToDataFlowGraphPass); + if (FLAGS_inference_analysis_enable_tensorrt_subgraph_engine) { + auto trt_teller = [](const Node* node) { + if (!node->IsFunction()) return false; + return static_cast(node)->func_type() == "mul"; + }; + AddPass("tensorrt-subgraph-marker", + new TensorRTSubgraphNodeMarkPass(trt_teller)); + AddPass("tensorrt-subgraph", new TensorRTSubGraphPass(trt_teller)); + } + AddPass("data-flow-graph-to-fluid", new DataFlowGraphToFluidPass); + } + + std::string repr() const override { return "dfg-pass-manager"; } + std::string description() const override { return "DFG pass manager."; } + + private: + void AddPass(const std::string& name, Pass* pass) { + LOG(INFO) << "Adding pass " << name; + Register(name, pass); + AddGraphvizDebugerPass(pass); + } + + // Add the graphviz debuger pass if the parent pass has one. + void AddGraphvizDebugerPass(Pass* pass) { + auto* debuger_pass = pass->CreateGraphvizDebugerPass(); + if (debuger_pass) { + LOG(INFO) << " - register debug pass [" << debuger_pass->repr() << "]"; + Register(debuger_pass->repr(), debuger_pass); + } + } +}; + +Analyzer::Analyzer() { Register("manager1", new DfgPassManagerImpl); } + +void Analyzer::Run(Argument* argument) { + for (auto& x : data_) { + PADDLE_ENFORCE(x->Initialize(argument)); + x->RunAll(); + PADDLE_ENFORCE(x->Finalize()); + } +} + +} // namespace analysis +} // namespace inference +} // namespace paddle diff --git a/paddle/fluid/inference/analysis/analyzer.h b/paddle/fluid/inference/analysis/analyzer.h new file mode 100644 index 0000000000..e9e14fb194 --- /dev/null +++ b/paddle/fluid/inference/analysis/analyzer.h @@ -0,0 +1,68 @@ +/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + +http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#pragma once + +/* + * This file contains Analyzer, an class that exposed as a library that analyze + * and optimize + * Fluid ProgramDesc for inference. Similar to LLVM, it has multiple flags to + * control whether + * an process is applied on the program. + * + * The processes are called Passes in analysis, the Passes are placed in a + * pipeline, the first + * Pass is the FluidToDataFlowGraphPass which transforms a Fluid ProgramDesc to + * a data flow + * graph, the last Pass is DataFlowGraphToFluidPass which transforms a data flow + * graph to a + * Fluid ProgramDesc. The passes in the middle of the pipeline can be any Passes + * which take a + * node or data flow graph as input. + * + * The Analyzer can be used in two methods, the first is a executable file which + * can be used to + * pre-process the inference model and can be controlled by passing difference + * command flags; + * the other way is to compose inside the inference API as a runtime pre-process + * phase in the + * inference service. + */ + +#include +#include "paddle/fluid/inference/analysis/pass.h" +#include "paddle/fluid/inference/analysis/pass_manager.h" + +namespace paddle { +namespace inference { +namespace analysis { + +// TODO(Superjomn) add a definition flag like PADDLE_WITH_TENSORRT and hide this +// flag if not available. +DECLARE_bool(inference_analysis_enable_tensorrt_subgraph_engine); +DECLARE_string(inference_analysis_graphviz_log_root); + +class Analyzer : public OrderedRegistry { + public: + // Register all the pass-managers. + Analyzer(); + + void Run(Argument* argument); + + DISABLE_COPY_AND_ASSIGN(Analyzer); +}; + +} // namespace analysis +} // namespace inference +} // namespace paddle diff --git a/paddle/contrib/tape/variable.cc b/paddle/fluid/inference/analysis/analyzer_tester.cc similarity index 52% rename from paddle/contrib/tape/variable.cc rename to paddle/fluid/inference/analysis/analyzer_tester.cc index 5ec1612909..d7c1a72932 100644 --- a/paddle/contrib/tape/variable.cc +++ b/paddle/fluid/inference/analysis/analyzer_tester.cc @@ -12,22 +12,18 @@ // See the License for the specific language governing permissions and // limitations under the License. -#include "paddle/contrib/tape/variable.h" +#include "paddle/fluid/inference/analysis/analyzer.h" +#include "paddle/fluid/inference/analysis/ut_helper.h" namespace paddle { -namespace tape { +namespace inference { +namespace analysis { -void Variable::InitializeVariable() { - LOG(INFO) << "Initialzing " << desc_.Name() << " as " << desc_.GetType(); - framework::proto::VarType::Type var_type = desc_.GetType(); - if (var_type == framework::proto::VarType::LOD_TENSOR) { - var_.GetMutable(); - } else if (var_type == framework::proto::VarType::SELECTED_ROWS) { - var_.GetMutable(); - } else { - PADDLE_THROW("Variable type %d is not in [LOD_TENSOR, SELECTED_ROWS]", - var_type); - } -} -} +TEST_F(DFG_Tester, main) { + Analyzer analyser; + analyser.Run(&argument); } + +} // namespace analysis +} // namespace inference +} // namespace paddle diff --git a/paddle/fluid/inference/analysis/argument.cc b/paddle/fluid/inference/analysis/argument.cc new file mode 100644 index 0000000000..cb0263d5d9 --- /dev/null +++ b/paddle/fluid/inference/analysis/argument.cc @@ -0,0 +1,15 @@ +// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "paddle/fluid/inference/analysis/argument.h" diff --git a/paddle/fluid/inference/analysis/argument.h b/paddle/fluid/inference/analysis/argument.h new file mode 100644 index 0000000000..6d316f20bf --- /dev/null +++ b/paddle/fluid/inference/analysis/argument.h @@ -0,0 +1,58 @@ +// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +/* + * This file defines the class Argument, which is the input and output of the + * analysis module. All the fields that needed either by Passes or PassManagers + * are contained in Argument. + * + * TODO(Superjomn) Find some way better to contain the fields when it grow too + * big. + */ + +#pragma once + +#include "paddle/fluid/framework/program_desc.h" +#include "paddle/fluid/inference/analysis/data_flow_graph.h" + +namespace paddle { +namespace inference { +namespace analysis { + +/* + * The argument definition of both Pass and PassManagers. + * + * All the fields should be registered here for clearness. + */ +struct Argument { + // The graph that process by the Passes or PassManagers. + std::unique_ptr main_dfg; + + // The original program desc. + std::unique_ptr origin_program_desc; + + // The processed program desc. + std::unique_ptr transformed_program_desc; +}; + +#define UNLIKELY(condition) __builtin_expect(static_cast(condition), 0) +#define ANALYSIS_ARGUMENT_CHECK_FIELD(field__) \ + if (UNLIKELY(!(field__))) { \ + LOG(ERROR) << "field " << #field__ << " should be set."; \ + return false; \ + } + +} // namespace analysis +} // namespace inference +} // namespace paddle diff --git a/paddle/fluid/inference/analysis/data_flow_graph.cc b/paddle/fluid/inference/analysis/data_flow_graph.cc index 4220451e3c..d09bf3ed16 100644 --- a/paddle/fluid/inference/analysis/data_flow_graph.cc +++ b/paddle/fluid/inference/analysis/data_flow_graph.cc @@ -14,12 +14,13 @@ limitations under the License. */ #include "paddle/fluid/inference/analysis/data_flow_graph.h" #include "paddle/fluid/inference/analysis/dot.h" +#include "paddle/fluid/inference/analysis/node.h" namespace paddle { namespace inference { namespace analysis { -// It is a better idea that the inputs and outputs of this graph is set manully +// It is a better idea that the inputs and outputs of this graph is set manually // before, but there must be a Pass that helps to prune the unnecessary ops that // do not contribute to the given targets, so in this pass, analysis and get the // inputs and outputs is OK. @@ -49,6 +50,25 @@ void DataFlowGraph::Build() { outputs.push_back(out); } } + + Clean(); +} + +void DataFlowGraph::Clean() { + for (auto &node : nodes.nodes()) { + std::unordered_set inlinks_set(node->inlinks.begin(), + node->inlinks.end()); + std::unordered_set outlinks_set(node->outlinks.begin(), + node->outlinks.end()); + if (inlinks_set.size() < node->inlinks.size()) { + LOG(INFO) << "Clean: node " << node->repr() << " prune duplicate inputs"; + node->inlinks.assign(inlinks_set.begin(), inlinks_set.end()); + } + if (outlinks_set.size() < node->outlinks.size()) { + LOG(INFO) << "Clean: node " << node->repr() << " prune duplicate inputs"; + node->outlinks.assign(outlinks_set.begin(), outlinks_set.end()); + } + } } std::string DataFlowGraph::DotString() const { @@ -57,19 +77,7 @@ std::string DataFlowGraph::DotString() const { // Add nodes for (size_t i = 0; i < nodes.size(); i++) { const Node &node = nodes.Get(i); - switch (node.type()) { - case Node::Type::kValue: - dot.AddNode(node.repr(), node.dot_attrs()); - break; - case Node::Type::kFunction: - dot.AddNode(node.repr(), node.dot_attrs()); - break; - case Node::Type::kFunctionBlock: - dot.AddNode(node.repr(), node.dot_attrs()); - break; - default: - PADDLE_THROW("unsupported Node type %d", static_cast(node.type())); - } + dot.AddNode(node.repr(), node.dot_attrs()); } // Add edges diff --git a/paddle/fluid/inference/analysis/data_flow_graph.h b/paddle/fluid/inference/analysis/data_flow_graph.h index 913e344d37..a4fefc83e0 100644 --- a/paddle/fluid/inference/analysis/data_flow_graph.h +++ b/paddle/fluid/inference/analysis/data_flow_graph.h @@ -47,6 +47,10 @@ struct DataFlowGraph { // Output a DOT graph file for debug. std::string DotString() const; + + private: + // Remove duplicate edges and so on. + void Clean(); }; /* @@ -133,17 +137,24 @@ struct GraphTraits { // Extract the inputs and outputs of a graph. The inputs and outputs of a // sub-graph is the inputs nodes and output nodes that doesn't inside the // sub-graph. -std::pair< - std::vector, - std::vector< - Node *>> static ExtractInputAndOutputOfSubGraph(std::vector - &graph) { +static std::pair, std::vector> +ExtractInputAndOutputOfSubGraph(std::vector &graph) { // NOLINT std::unordered_set nodes(graph.begin(), graph.end()); std::unordered_set inputs; std::unordered_set outputs; + // Input a Value, check whether its inlink is in the subgraph. + auto inlink_in_subgraph = [&](Node *n) { + for (auto *in : n->inlinks) { + if (nodes.count(in)) return true; + } + return false; + }; for (auto &node : graph) { for (auto *in : node->inlinks) { - if (!nodes.count(in) && in->type() == Node::Type::kValue) { + // The Value that is written by nodes inside a sub-graph shouldn't be the + // input of the sub-graph. + if (!nodes.count(in) && in->type() == Node::Type::kValue && + !inlink_in_subgraph(in)) { inputs.insert(in); } } diff --git a/paddle/fluid/inference/analysis/data_flow_graph_to_fluid_pass.cc b/paddle/fluid/inference/analysis/data_flow_graph_to_fluid_pass.cc new file mode 100644 index 0000000000..29ca008123 --- /dev/null +++ b/paddle/fluid/inference/analysis/data_flow_graph_to_fluid_pass.cc @@ -0,0 +1,171 @@ +// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "paddle/fluid/inference/analysis/data_flow_graph_to_fluid_pass.h" +#include +#include "paddle/fluid/framework/block_desc.h" +#include "paddle/fluid/framework/op_desc.h" +#include "paddle/fluid/framework/proto_desc.h" +#include "paddle/fluid/inference/analysis/analyzer.h" +#include "paddle/fluid/inference/analysis/dfg_graphviz_draw_pass.h" + +namespace paddle { +namespace inference { +namespace analysis { + +using framework::proto::ProgramDesc; + +std::vector ExtractParameters( + const std::vector>& nodes); + +bool DataFlowGraphToFluidPass::Initialize(Argument* argument) { + ANALYSIS_ARGUMENT_CHECK_FIELD(argument) + ANALYSIS_ARGUMENT_CHECK_FIELD(argument->origin_program_desc) + PADDLE_ENFORCE(!argument->transformed_program_desc); + // The transformed_program_desc should inherit all the VarDesc and BlockDesc + // from the original program desc. The operators of the main block(the first + // block) should rewritten by data flow graph. + argument->transformed_program_desc.reset( + new ProgramDesc(*argument->origin_program_desc)); + argument->transformed_program_desc->mutable_blocks(framework::kRootBlockIndex) + ->clear_ops(); + desc_ = argument->transformed_program_desc.get(); + argument_ = argument; + return true; +} + +bool DataFlowGraphToFluidPass::Finalize() { return true; } + +void DataFlowGraphToFluidPass::Run(DataFlowGraph* graph) { + auto traits = GraphTraits(graph); + for (auto it = traits.nodes().begin(); it != traits.nodes().end(); ++it) { + if (it->deleted()) continue; + + switch (it->type()) { + case Node::Type::kFunction: { + LOG(INFO) << "add function " << it->repr(); + AddFluidOp(&(*it)); + } break; + case Node::Type::kFunctionBlock: { + LOG(INFO) << "add engine op " << it->repr() << " , " + << static_cast(&(*it))->subgraph.size(); + AddEngineOp(&(*it)); + } break; + default: + continue; + } + } +} + +void DataFlowGraphToFluidPass::AddFluidOp(Node* node) { + auto* ori_op = static_cast(node->pb_desc()); + // currently only the main block is analyzed. + auto* main_block = desc_->mutable_blocks(framework::kRootBlockIndex); + auto* op = main_block->add_ops(); + *op = *ori_op; // copy the attributes, by default, these will not be changed + // by analysis phrase. + // The inputs and outputs of the existing ops are not changed by tensorrt + // subgraph pass. + // NOTE It might be changed by other passes in the long run. +} + +void CreateTrtEngineOp(Node* node, const DataFlowGraph& graph, + const framework::proto::BlockDesc& block) { + static int counter{0}; + PADDLE_ENFORCE(node->IsFunctionBlock()); + framework::OpDesc desc; + auto* func = static_cast(node); + + // collect inputs + std::vector io; + for (auto* x : func->inlinks) { + io.push_back(x->name()); + } + desc.SetInput("Xs", io); + + // collect outputs + io.clear(); + for (auto* x : func->outlinks) { + io.push_back(x->name()); + } + desc.SetOutput("Ys", io); + + desc.SetType("tensorrt_engine"); + // Set attrs + SetAttr(desc.Proto(), "subgraph", block.SerializeAsString()); + SetAttr(desc.Proto(), "engine_unique_key", + "trt-" + std::to_string(counter++)); + SetAttr(desc.Proto(), "max_batch", 100); // TODO(Superjomn) add config latter + SetAttr(desc.Proto(), "max_workspace", + 1024); // TODO(Superjomn) add config latter + SetAttr(desc.Proto(), "parameters", ExtractParameters(graph.nodes.nodes())); + node->SetPbMsg(desc.Proto()->SerializeAsString()); +} + +std::vector ExtractParameters( + const std::vector>& nodes) { + std::vector parameters; + for (const auto& node : nodes) { + if (!node->IsValue()) continue; + PADDLE_ENFORCE(!node->pb_msg().empty(), "pb_msg should be set first"); + framework::proto::VarDesc var; + var.ParseFromString(node->pb_msg()); + if (var.persistable()) { + parameters.push_back(var.name()); + } + } + return parameters; +} + +void DataFlowGraphToFluidPass::AddEngineOp(Node* node) { + // TODO(Superjomn) Here need to expose some arguments for default setting. + PADDLE_ENFORCE(node->IsFunctionBlock()); + auto* block_node = static_cast(node); + framework::proto::BlockDesc proto; + framework::BlockDesc block_desc(nullptr, &proto); + // copy ops. + for (auto* node : block_node->subgraph) { + auto* op = block_desc.AppendOp(); + PADDLE_ENFORCE(!node->pb_msg().empty()); + op->Proto()->ParseFromString(node->pb_msg()); + } + CreateTrtEngineOp(node, *argument_->main_dfg, *block_desc.Proto()); + auto* main_block = desc_->mutable_blocks(framework::kRootBlockIndex); + auto* op = main_block->add_ops(); + PADDLE_ENFORCE(!node->pb_msg().empty(), "failed to set desc for block"); + op->ParseFromString(node->pb_msg()); +} + +namespace { +class DFG_DebuggerPass : public DFG_GraphvizDrawPass { + public: + using Config = DFG_GraphvizDrawPass::Config; + explicit DFG_DebuggerPass(const Config& config) + : DFG_GraphvizDrawPass(config) {} + + std::string repr() const override { return "dfg-to-fluid-debuger-pass"; } + + bool Finalize() override { return true; } +}; +} // namespace + +Pass* DataFlowGraphToFluidPass::CreateGraphvizDebugerPass() const { + return new DFG_DebuggerPass(DFG_GraphvizDrawPass::Config( + FLAGS_inference_analysis_graphviz_log_root, + "data_flow_graph_to_fluid_graphviz_debugger")); +} + +} // namespace analysis +} // namespace inference +} // namespace paddle diff --git a/paddle/fluid/inference/analysis/data_flow_graph_to_fluid_pass.h b/paddle/fluid/inference/analysis/data_flow_graph_to_fluid_pass.h new file mode 100644 index 0000000000..edc84b02ed --- /dev/null +++ b/paddle/fluid/inference/analysis/data_flow_graph_to_fluid_pass.h @@ -0,0 +1,58 @@ +/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. */ + +/* + * This file implements the transformation from fluid ProgramDesc to data flow + * graph. + */ + +#pragma once + +#include +#include "paddle/fluid/framework/program_desc.h" +#include "paddle/fluid/inference/analysis/data_flow_graph.h" +#include "paddle/fluid/inference/analysis/pass.h" + +namespace paddle { +namespace inference { +namespace analysis { +class DataFlowGraphToFluidPass final : public DataFlowGraphPass { + public: + DataFlowGraphToFluidPass() = default; + + bool Initialize(Argument *argument) override; + bool Finalize() override; + + void Run(DataFlowGraph *graph) override; + + std::string repr() const override { return "DFG to fluid"; } + std::string description() const override { + return "Transform a DFG to a Fluid ProgramDesc"; + } + + Pass *CreateGraphvizDebugerPass() const override; + + protected: + // Add a Fluid Op into the ProgramDesc. + void AddFluidOp(Node *node); + // Add a EngineOp into the ProgramDesc. + void AddEngineOp(Node *node); + + private: + framework::proto::ProgramDesc *desc_; + Argument *argument_; +}; +} // namespace analysis +} // namespace inference +} // namespace paddle diff --git a/paddle/fluid/inference/analysis/data_flow_graph_to_fluid_pass_tester.cc b/paddle/fluid/inference/analysis/data_flow_graph_to_fluid_pass_tester.cc index dcee75cee5..d8fc5e580a 100644 --- a/paddle/fluid/inference/analysis/data_flow_graph_to_fluid_pass_tester.cc +++ b/paddle/fluid/inference/analysis/data_flow_graph_to_fluid_pass_tester.cc @@ -27,13 +27,12 @@ namespace inference { namespace analysis { TEST_F(DFG_Tester, Test) { - framework::proto::ProgramDesc new_desc; DataFlowGraph graph; FluidToDataFlowGraphPass pass0; DataFlowGraphToFluidPass pass1; - pass0.Initialize(desc); - pass1.Initialize(&new_desc); + ASSERT_TRUE(pass0.Initialize(&argument)); + ASSERT_TRUE(pass1.Initialize(&argument)); pass0.Run(&graph); pass1.Run(&graph); diff --git a/paddle/fluid/inference/analysis/dfg_graphviz_draw_pass.cc b/paddle/fluid/inference/analysis/dfg_graphviz_draw_pass.cc new file mode 100644 index 0000000000..a6f8548475 --- /dev/null +++ b/paddle/fluid/inference/analysis/dfg_graphviz_draw_pass.cc @@ -0,0 +1,59 @@ +/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + +http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#include "paddle/fluid/inference/analysis/dfg_graphviz_draw_pass.h" + +namespace paddle { +namespace inference { +namespace analysis { + +int DFG_GraphvizDrawPass::counter_{0}; + +void DFG_GraphvizDrawPass::Run(DataFlowGraph *graph) { + auto content = Draw(graph); + auto dot_path = GenDotPath(); + std::ofstream file(dot_path); + file.write(content.c_str(), content.size()); + file.close(); + + auto png_path = dot_path.substr(0, dot_path.size() - 4) + ".png"; + std::string message; + LOG(INFO) << "draw to " << png_path; + ExecShellCommand("dot -Tpng " + dot_path + " -o " + png_path, &message); +} + +std::string DFG_GraphvizDrawPass::Draw(DataFlowGraph *graph) { + Dot dot; + // Add nodes + for (size_t i = 0; i < graph->nodes.size(); i++) { + const Node &node = graph->nodes.Get(i); + if (config_.display_deleted_node || !node.deleted()) { + dot.AddNode(node.repr(), node.dot_attrs()); + } + } + // Add edges + for (size_t i = 0; i < graph->nodes.size(); i++) { + const Node &node = graph->nodes.Get(i); + if (!config_.display_deleted_node && node.deleted()) continue; + for (auto &in : node.inlinks) { + if (!config_.display_deleted_node && in->deleted()) continue; + dot.AddEdge(in->repr(), node.repr(), {}); + } + } + return dot.Build(); +} + +} // namespace analysis +} // namespace inference +} // namespace paddle diff --git a/paddle/fluid/inference/analysis/dfg_graphviz_draw_pass.h b/paddle/fluid/inference/analysis/dfg_graphviz_draw_pass.h index 41d4475382..17445ab440 100644 --- a/paddle/fluid/inference/analysis/dfg_graphviz_draw_pass.h +++ b/paddle/fluid/inference/analysis/dfg_graphviz_draw_pass.h @@ -21,6 +21,7 @@ limitations under the License. */ #include #include +#include "paddle/fluid/inference/analysis/dot.h" #include "paddle/fluid/inference/analysis/pass.h" namespace paddle { @@ -32,35 +33,44 @@ namespace analysis { */ class DFG_GraphvizDrawPass : public DataFlowGraphPass { public: - DFG_GraphvizDrawPass(const std::string& dir, const std::string& id) - : dir_(dir), id_(id) {} - - bool Initialize() override { return Pass::Initialize(); } - void Run(DataFlowGraph* graph) override { - auto content = Draw(graph); - std::ofstream file(GenDotPath()); - file.write(content.c_str(), content.size()); - file.close(); - LOG(INFO) << "draw dot to " << GenDotPath(); - } + struct Config { + Config(const std::string &dir, const std::string &id, + bool display_deleted_node = false) + : dir(dir), id(id), display_deleted_node(display_deleted_node) {} + + // The directory to store the .dot or .png files. + const std::string dir; + // The identifier for this dot file. + const std::string id; + // Whether to display deleted nodes, default false. + const bool display_deleted_node; + }; - bool Finalize() override { return Pass::Finalize(); } + explicit DFG_GraphvizDrawPass(const Config &config) : config_(config) {} - Pass* CreatePrinterPass(std::ostream& os, - const std::string& banner) const override { - return nullptr; + bool Initialize(Argument *argument) override { return true; } + void Run(DataFlowGraph *graph) override; + bool Finalize() override { return true; } + + std::string repr() const override { return "DFG graphviz drawer"; } + std::string description() const override { + return "Debug a DFG by draw with graphviz"; } - private: + protected: + // A counter to add a number prefix to the debugger image output so that they + // will sort in the triggered order. + static int counter_; + // Path of the dot file to output. std::string GenDotPath() const { - return dir_ + "/" + "graph_" + id_ + ".dot"; + return config_.dir + "/" + std::to_string(counter_++) + "-graph_" + + config_.id + ".dot"; } - std::string Draw(DataFlowGraph* graph) { return graph->DotString(); } + virtual std::string Draw(DataFlowGraph *graph); - std::string dir_; - std::string id_; + Config config_; }; } // namespace analysis diff --git a/paddle/fluid/inference/analysis/dfg_graphviz_draw_pass_tester.cc b/paddle/fluid/inference/analysis/dfg_graphviz_draw_pass_tester.cc index 3fc1cc18b8..162455b9c4 100644 --- a/paddle/fluid/inference/analysis/dfg_graphviz_draw_pass_tester.cc +++ b/paddle/fluid/inference/analysis/dfg_graphviz_draw_pass_tester.cc @@ -24,13 +24,14 @@ namespace inference { namespace analysis { TEST_F(DFG_Tester, dfg_graphviz_draw_pass_tester) { - auto dfg = ProgramDescToDFG(desc); - DFG_GraphvizDrawPass pass("./", "test"); - pass.Initialize(); + auto dfg = ProgramDescToDFG(*argument.origin_program_desc); + DFG_GraphvizDrawPass::Config config("./", "test"); + DFG_GraphvizDrawPass pass(config); + pass.Initialize(&argument); pass.Run(&dfg); // test content - std::ifstream file("./graph_test.dot"); + std::ifstream file("./0-graph_test.dot"); ASSERT_TRUE(file.is_open()); std::string line; @@ -38,6 +39,7 @@ TEST_F(DFG_Tester, dfg_graphviz_draw_pass_tester) { while (std::getline(file, line)) { no++; } + // DFG is sensitive to ProgramDesc, be careful to change the existing models. ASSERT_EQ(no, 82); } diff --git a/paddle/fluid/inference/analysis/fluid_to_data_flow_graph_pass.cc b/paddle/fluid/inference/analysis/fluid_to_data_flow_graph_pass.cc index 9f67c989cc..e918622d74 100644 --- a/paddle/fluid/inference/analysis/fluid_to_data_flow_graph_pass.cc +++ b/paddle/fluid/inference/analysis/fluid_to_data_flow_graph_pass.cc @@ -15,25 +15,31 @@ limitations under the License. */ #include #include +#include "paddle/fluid/inference/analysis/analyzer.h" +#include "paddle/fluid/inference/analysis/dfg_graphviz_draw_pass.h" #include "paddle/fluid/inference/analysis/fluid_to_data_flow_graph_pass.h" namespace paddle { namespace inference { namespace analysis { -FluidToDataFlowGraphPass::FluidToDataFlowGraphPass() {} - -bool FluidToDataFlowGraphPass::Initialize() { return Pass::Initialize(); } - -bool FluidToDataFlowGraphPass::Initialize( - const framework::proto::ProgramDesc &desc) { - desc_ = &desc; +bool FluidToDataFlowGraphPass::Initialize(Argument *argument) { + ANALYSIS_ARGUMENT_CHECK_FIELD(argument); + ANALYSIS_ARGUMENT_CHECK_FIELD(argument->origin_program_desc); + PADDLE_ENFORCE(argument); + if (!argument->main_dfg) { + LOG(INFO) << "Init DFG"; + argument->main_dfg.reset(new DataFlowGraph); + } + desc_ = argument->origin_program_desc.get(); return true; } -bool FluidToDataFlowGraphPass::Finalize() { return Pass::Finalize(); } +bool FluidToDataFlowGraphPass::Finalize() { return true; } void FluidToDataFlowGraphPass::Run(DataFlowGraph *graph) { + PADDLE_ENFORCE(graph); + PADDLE_ENFORCE(desc_); // insert vars std::unordered_map var2id; auto &main_block = desc_->blocks(framework::kRootBlockIndex); @@ -41,7 +47,8 @@ void FluidToDataFlowGraphPass::Run(DataFlowGraph *graph) { const auto &var = main_block.vars(i); auto *v = graph->nodes.Create(Node::Type::kValue); v->SetName(var.name()); - v->SetExtraInfo(const_cast(static_cast(&var))); + v->SetPbDesc(const_cast(static_cast(&var))); + v->SetPbMsg(var.SerializeAsString()); var2id[var.name()] = v->id(); } for (int i = 0; i < main_block.ops_size(); i++) { @@ -51,7 +58,9 @@ void FluidToDataFlowGraphPass::Run(DataFlowGraph *graph) { static_cast(o)->SetFuncType(op.type()); // Link to the original protobuf message's memory, make it easier to // generate from a data flow graph to fluid ProgramDesc. - o->SetExtraInfo(const_cast(static_cast(&op))); + o->SetPbDesc(const_cast(static_cast(&op))); + o->SetPbMsg(op.SerializeAsString()); + // set inputs and outputs // TODO(Superjomn) make sure the InputNames is the real variable name. for (int j = 0; j < op.inputs_size(); j++) { @@ -75,9 +84,20 @@ void FluidToDataFlowGraphPass::Run(DataFlowGraph *graph) { graph->Build(); } -Pass *FluidToDataFlowGraphPass::CreatePrinterPass( - std::ostream &os, const std::string &banner) const { - return nullptr; +namespace { +class DFG_DebuggerPass : public DFG_GraphvizDrawPass { + public: + using Config = DFG_GraphvizDrawPass::Config; + explicit DFG_DebuggerPass(const Config &config) + : DFG_GraphvizDrawPass(config) {} + std::string repr() const override { return "fluid-to-dfg-debuger-pass"; } + bool Finalize() override { return true; } +}; +} + +Pass *FluidToDataFlowGraphPass::CreateGraphvizDebugerPass() const { + return new DFG_DebuggerPass(DFG_GraphvizDrawPass::Config( + FLAGS_inference_analysis_graphviz_log_root, "fluid-to-dfg-debuger")); } } // namespace analysis diff --git a/paddle/fluid/inference/analysis/fluid_to_data_flow_graph_pass.h b/paddle/fluid/inference/analysis/fluid_to_data_flow_graph_pass.h index 33517e57be..da8463b63b 100644 --- a/paddle/fluid/inference/analysis/fluid_to_data_flow_graph_pass.h +++ b/paddle/fluid/inference/analysis/fluid_to_data_flow_graph_pass.h @@ -34,15 +34,19 @@ namespace analysis { */ class FluidToDataFlowGraphPass final : public DataFlowGraphPass { public: - FluidToDataFlowGraphPass(); - bool Initialize() override; - bool Initialize(const framework::proto::ProgramDesc &desc) override; + FluidToDataFlowGraphPass() = default; + + bool Initialize(Argument *argument) override; bool Finalize() override; void Run(DataFlowGraph *graph) override; - Pass *CreatePrinterPass(std::ostream &os, - const std::string &banner) const override; + std::string repr() const override { return "fluid-to-data-flow-graph"; } + std::string description() const override { + return "transform a fluid ProgramDesc to a data flow graph."; + } + + Pass *CreateGraphvizDebugerPass() const override; private: framework::proto::ProgramDesc const *desc_; diff --git a/paddle/fluid/inference/analysis/fluid_to_data_flow_graph_pass_tester.cc b/paddle/fluid/inference/analysis/fluid_to_data_flow_graph_pass_tester.cc index 817d32c92c..cbca5abdd5 100644 --- a/paddle/fluid/inference/analysis/fluid_to_data_flow_graph_pass_tester.cc +++ b/paddle/fluid/inference/analysis/fluid_to_data_flow_graph_pass_tester.cc @@ -23,11 +23,11 @@ namespace analysis { TEST_F(DFG_Tester, Init) { FluidToDataFlowGraphPass pass; - pass.Initialize(); - pass.Initialize(desc); + pass.Initialize(&argument); DataFlowGraph graph; pass.Run(&graph); - ASSERT_GT(graph.nodes.size(), 0); + // Analysis is sensitive to ProgramDesc, careful to change the original model. + ASSERT_EQ(graph.nodes.size(), 37UL); pass.Finalize(); LOG(INFO) << '\n' << graph.DotString(); } diff --git a/paddle/fluid/inference/analysis/helper.cc b/paddle/fluid/inference/analysis/helper.cc new file mode 100644 index 0000000000..ca40c01fc5 --- /dev/null +++ b/paddle/fluid/inference/analysis/helper.cc @@ -0,0 +1,60 @@ +// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "paddle/fluid/inference/analysis/helper.h" +#include "paddle/fluid/framework/framework.pb.h" + +namespace paddle { +namespace inference { +namespace analysis { + +template <> +void SetAttr(framework::proto::OpDesc *op, const std::string &name, + const std::string &data) { + auto *attr = op->add_attrs(); + attr->set_name(name); + attr->set_type(paddle::framework::proto::AttrType::STRING); + attr->set_s(data); +} +template <> +void SetAttr(framework::proto::OpDesc *op, const std::string &name, + const int &data) { + auto *attr = op->add_attrs(); + attr->set_name(name); + attr->set_type(paddle::framework::proto::AttrType::INT); + attr->set_i(data); +} +template <> +void SetAttr(framework::proto::OpDesc *op, const std::string &name, + const int64_t &data) { + auto *attr = op->add_attrs(); + attr->set_name(name); + attr->set_type(paddle::framework::proto::AttrType::LONG); + attr->set_l(data); +} +template <> +void SetAttr>(framework::proto::OpDesc *op, + const std::string &name, + const std::vector &data) { + auto *attr = op->add_attrs(); + attr->set_name(name); + attr->set_type(paddle::framework::proto::AttrType::STRINGS); + for (const auto &s : data) { + attr->add_strings(s.c_str()); + } +} + +} // namespace analysis +} // namespace inference +} // namespace paddle diff --git a/paddle/fluid/inference/analysis/helper.h b/paddle/fluid/inference/analysis/helper.h index 58eb0e715c..f1064cd20f 100644 --- a/paddle/fluid/inference/analysis/helper.h +++ b/paddle/fluid/inference/analysis/helper.h @@ -14,10 +14,13 @@ limitations under the License. */ #pragma once +#include #include +#include #include #include +#include "paddle/fluid/framework/framework.pb.h" #include "paddle/fluid/framework/scope.h" #include "paddle/fluid/framework/variable.h" #include "paddle/fluid/platform/enforce.h" @@ -26,6 +29,10 @@ namespace paddle { namespace inference { namespace analysis { +template +void SetAttr(framework::proto::OpDesc *op, const std::string &name, + const T &data); + template int AccuDims(Vec &&vec, int size) { int res = 1; @@ -35,7 +42,7 @@ int AccuDims(Vec &&vec, int size) { return res; } -#define SET_TYPE(type__) dic_[typeid(type__).hash_code()] = #type__; +#define SET_TYPE(type__) dic_[std::type_index(typeid(type__))] = #type__; /* * Map typeid to representation. */ @@ -47,14 +54,14 @@ struct DataTypeNamer { template const std::string &repr() const { - auto x = typeid(T).hash_code(); + auto x = std::type_index(typeid(T)); PADDLE_ENFORCE(dic_.count(x), "unknown type for representation"); return dic_.at(x); } - const std::string &repr(size_t &hash) const { // NOLINT - PADDLE_ENFORCE(dic_.count(hash), "unknown type for representation"); - return dic_.at(hash); + const std::string &repr(const std::type_index &type) const { // NOLINT + PADDLE_ENFORCE(dic_.count(type), "unknown type for representation"); + return dic_.at(type); } private: @@ -62,11 +69,10 @@ struct DataTypeNamer { SET_TYPE(int); SET_TYPE(bool); SET_TYPE(float); + SET_TYPE(void *); } - std::unordered_map - dic_; + std::unordered_map dic_; }; #undef SET_TYPE @@ -92,7 +98,7 @@ template class OrderedRegistry { public: T *Register(const std::string &name, T *x) { - PADDLE_ENFORCE(!dic_.count(name)); + PADDLE_ENFORCE(!dic_.count(name), "duplicate key [%s]", name); dic_[name] = data_.size(); data_.emplace_back(std::unique_ptr(x)); return data_.back().get(); @@ -116,6 +122,20 @@ T &GetFromScope(const framework::Scope &scope, const std::string &name) { return *var->GetMutable(); } +static void ExecShellCommand(const std::string &cmd, std::string *message) { + char buffer[128]; + std::shared_ptr pipe(popen(cmd.c_str(), "r"), pclose); + if (!pipe) { + LOG(ERROR) << "error running command: " << cmd; + return; + } + while (!feof(pipe.get())) { + if (fgets(buffer, 128, pipe.get()) != nullptr) { + *message += buffer; + } + } +} + } // namespace analysis } // namespace inference } // namespace paddle diff --git a/paddle/fluid/inference/analysis/node.cc b/paddle/fluid/inference/analysis/node.cc index fe06052608..f2e918f3ff 100644 --- a/paddle/fluid/inference/analysis/node.cc +++ b/paddle/fluid/inference/analysis/node.cc @@ -20,6 +20,17 @@ namespace paddle { namespace inference { namespace analysis { +template <> +std::string &NodeAttr::As() { + if (data_.empty()) { + type_index_ = std::type_index(typeid(std::string)); + } + PADDLE_ENFORCE_EQ(type_index_, std::type_index(typeid(std::string))); + return data_; +} + +std::string &NodeAttr::String() { return As(); } + std::vector Value::dot_attrs() const { return std::vector({Dot::Attr("style", "filled,rounded"), Dot::Attr("shape", "box"), @@ -40,6 +51,9 @@ Node *NodeMap::Create(Node::Type type) { case Node::Type::kValue: nodes_.emplace_back(new Value); break; + case Node::Type::kFunctionBlock: + nodes_.emplace_back(new FunctionBlock); + break; default: PADDLE_THROW("Not supported node type."); } diff --git a/paddle/fluid/inference/analysis/node.h b/paddle/fluid/inference/analysis/node.h index 7972ca25c9..47e524bc5c 100644 --- a/paddle/fluid/inference/analysis/node.h +++ b/paddle/fluid/inference/analysis/node.h @@ -25,6 +25,7 @@ limitations under the License. */ #include #include +#include "paddle/fluid/framework/var_type.h" #include "paddle/fluid/inference/analysis/device.h" #include "paddle/fluid/inference/analysis/dot.h" #include "paddle/fluid/inference/analysis/helper.h" @@ -35,6 +36,44 @@ namespace analysis { class NodeMap; +// A helper class to maintain the status from Pass. +struct NodeAttr { + // NOTE T should be a primary type or a struct combined by several primary + // types. + // NOTE the STL containers should not use here. + // Some usages + // Attr attr; + // attr.Bool() = true; + + bool &Bool() { return As(); } + float &Float() { return As(); } + int32_t &Int32() { return As(); } + int64_t &Int64() { return As(); } + void *&Pointer() { return As(); } + std::string &String(); + + private: + template + T &As() { + // init storage in the first usage. + if (data_.empty()) { + VLOG(4) << "resize data to " << sizeof(T); + type_index_ = std::type_index(typeid(T)); + data_.resize(sizeof(T)); + } + PADDLE_ENFORCE(framework::IsType(type_index_), + "type not matched, origin is %s, want %s", + DataTypeNamer::Global().repr(type_index_), + DataTypeNamer::Global().repr()); + PADDLE_ENFORCE_EQ(data_.size(), sizeof(T), "Node attr type recast error"); + return *reinterpret_cast(&data_[0]); + } + + private: + std::string data_; + std::type_index type_index_{typeid(NodeAttr)}; +}; + /* * Node Representation. * @@ -50,8 +89,6 @@ class Node { Node() = default; - struct Attr; - // Cast to a subclass type, Function for example. template Subclass &As() { @@ -71,12 +108,20 @@ class Node { // Get an additional attribute and convert it to T data type. NOTE this will // silently create a new attribute if not exists. - Attr &attr(const std::string &name) { return attrs_[name]; } + NodeAttr &attr(const std::string &name) const { return attrs_[name]; } int id() const { return id_; } - bool deleted() const { return deleted_; } + // The Protobuf description is set/get with a void* to decouple Node interface + // from a specific kind of Protobuf message. + void SetPbDesc(void *pb) { attr("pb_desc").Pointer() = pb; } + void *pb_desc() const { return attr("pb_desc").Pointer(); } + + void SetPbMsg(const std::string &s) { attr("pb_msg").String() = s; } + const std::string &pb_msg() const { return attr("pb_msg").String(); } + void SetDeleted() { deleted_ = true; } + bool deleted() const { return deleted_; } void SetName(const std::string &name) { name_ = name; } const std::string &name() const { return name_; } @@ -84,52 +129,12 @@ class Node { void SetType(Type type) { type_ = type; } Type type() const { return type_; } - void *extra_info() const { return extra_info_; } - void SetExtraInfo(void *extra_info) { extra_info_ = extra_info; } - // Input links. std::vector inlinks; // Output links. std::vector outlinks; - // A helper class to maintain the status from Pass. - // TODO(superjomn) add a checker here to ensure the T is primary. - struct Attr { - // NOTE T should be a primary type or a struct combined by several primary - // types. - // NOTE the STL containers should not use here. - // Some usages - // Attr attr; - // T data; - // attr.data.assign((char*)data, sizeof(data)); - - bool &Bool() { return As(); } - float &Float() { return As(); } - int32_t &Int32() { return As(); } - int64_t &Int64() { return As(); } - - private: - template - T &As() { - // init storage in the first usage. - if (data_.empty()) { - VLOG(4) << "resize data to " << sizeof(T); - type_hash_ = typeid(T).hash_code(); - data_.resize(sizeof(T)); - } - PADDLE_ENFORCE(type_hash_ == typeid(T).hash_code(), - "type not matched, origin is %s, want %s", - DataTypeNamer::Global().repr(type_hash_), - DataTypeNamer::Global().repr()); - PADDLE_ENFORCE_EQ(data_.size(), sizeof(T), "Node attr type recast error"); - return *reinterpret_cast(&data_[0]); - } - - private: - std::string data_; - size_t type_hash_{std::numeric_limits::max()}; - }; - + // Type checks. bool IsFunction() const { return type_ == Node::Type::kFunction; } bool IsValue() const { return type_ == Node::Type::kValue; } bool IsFunctionBlock() const { return type_ == Node::Type::kFunctionBlock; } @@ -148,10 +153,7 @@ class Node { Type type_{Type::kNone}; // Mark this node is deleted by some pass. bool deleted_{false}; - - void *extra_info_; - - mutable std::unordered_map attrs_; + mutable std::unordered_map attrs_; }; class Function; @@ -214,6 +216,10 @@ class Function : public Node { struct FunctionBlock : public Node { std::string repr() const override { return "block-" + std::to_string(id()); } std::vector subgraph; + + protected: + FunctionBlock() { SetType(Node::Type::kFunctionBlock); } + friend class NodeMap; }; class NodeMap { @@ -228,7 +234,7 @@ class NodeMap { void Delete(size_t id); - const std::vector> &nodes() { return nodes_; } + const std::vector> &nodes() const { return nodes_; } size_t size() const { return nodes_.size(); } diff --git a/paddle/fluid/inference/analysis/node_attr_flags.h b/paddle/fluid/inference/analysis/node_attr_flags.h new file mode 100644 index 0000000000..a3f70e5419 --- /dev/null +++ b/paddle/fluid/inference/analysis/node_attr_flags.h @@ -0,0 +1,32 @@ +// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +/* + * This file contains all the flags that declared in Node::Attr. + * + * The Node::Attr is designed to share information between different passes, one + * can get other's attributes in a Node by the flags in this file. + */ +#pragma once +namespace paddle { +namespace inference { +namespace analysis { + +#define DECLARE_NODE_ATTR(flag__) const char ATTR_##flag__[] = #flag__; + +DECLARE_NODE_ATTR(supported_by_tensorrt) // bool + +} // namespace analysis +} // namespace inference +} // namespace paddle diff --git a/paddle/fluid/inference/analysis/pass.h b/paddle/fluid/inference/analysis/pass.h index aa0e8667b5..6b4dbb3bb5 100644 --- a/paddle/fluid/inference/analysis/pass.h +++ b/paddle/fluid/inference/analysis/pass.h @@ -19,6 +19,7 @@ limitations under the License. */ #include #include "paddle/fluid/framework/framework.pb.h" +#include "paddle/fluid/inference/analysis/argument.h" #include "paddle/fluid/inference/analysis/data_flow_graph.h" #include "paddle/fluid/inference/analysis/helper.h" #include "paddle/fluid/inference/analysis/node.h" @@ -30,19 +31,11 @@ namespace analysis { class Pass { public: Pass() = default; - virtual ~Pass() {} - // Virtual method overridden by subclasses to do only necessary initialization - // before any pass is run. - virtual bool Initialize() { return false; } - // There is some passes such as FlowToDataFlowGraphPass that needs a - // ProgramDesc. Here use the native ProgramDesc ProtoBuf message, so that it - // only couple with the proto file. - virtual bool Initialize(const framework::proto::ProgramDesc &desc) { - return false; - } - // There are some Passes such as DataFlowGraphToFluidPass that will output a - // ProgramDesc. - virtual bool Initialize(framework::proto::ProgramDesc *desc) { return false; } + virtual ~Pass() = default; + // Mutable Pass. + virtual bool Initialize(Argument *argument) { return false; } + // Readonly Pass. + virtual bool Initialize(const Argument &argument) { return false; } // Virtual method overriden by subclasses to do any necessary clean up after // all passes have run. @@ -50,7 +43,12 @@ class Pass { // Get a Pass appropriate to print the Node this pass operates on. virtual Pass *CreatePrinterPass(std::ostream &os, - const std::string &banner) const = 0; + const std::string &banner) const { + return nullptr; + } + + // Create a debugger Pass that draw the DFG by graphviz toolkit. + virtual Pass *CreateGraphvizDebugerPass() const { return nullptr; } // Run on a single Node. virtual void Run(Node *x) { LOG(FATAL) << "not valid"; } @@ -60,6 +58,11 @@ class Pass { virtual void Run(FunctionBlock *x) { LOG(FATAL) << "not valid"; } // Run on a single DataFlowGraph. virtual void Run(DataFlowGraph *x) { LOG(FATAL) << "not valid"; } + + // Human-readable short representation. + virtual std::string repr() const = 0; + // Human-readable long description. + virtual std::string description() const = 0; }; // NodePass process on any Node types. diff --git a/paddle/fluid/inference/analysis/pass_manager.cc b/paddle/fluid/inference/analysis/pass_manager.cc new file mode 100644 index 0000000000..b428bb22b1 --- /dev/null +++ b/paddle/fluid/inference/analysis/pass_manager.cc @@ -0,0 +1,56 @@ +/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + +http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#include "paddle/fluid/inference/analysis/pass_manager.h" +#include "paddle/fluid/inference/analysis/fluid_to_data_flow_graph_pass.h" + +namespace paddle { +namespace inference { +namespace analysis { + +bool PassManager::Initialize(Argument* argument) { + argument_ = argument; + for (auto& pass : data_) { + LOG(INFO) << "Initializing pass " << pass->repr(); + if (!pass->Initialize(argument)) { + LOG(ERROR) << "Failed to initialize pass [" << pass->repr() << "]"; + return false; + } + } + return true; +} + +void DfgPassManager::RunAll() { + PADDLE_ENFORCE(argument_); + for (auto& pass : data_) { + VLOG(4) << "Running pass [" << pass->repr() << "]"; + pass->Run(argument_->main_dfg.get()); + } +} + +void NodePassManager::RunAll() { + PADDLE_ENFORCE(argument_); + PADDLE_ENFORCE(argument_->main_dfg.get()); + auto trait = + GraphTraits(argument_->main_dfg.get()).nodes_in_DFS(); + for (auto& node : trait) { + for (auto& pass : data_) { + pass->Run(&node); + } + } +} + +} // namespace analysis +} // namespace inference +} // namespace paddle diff --git a/paddle/fluid/inference/analysis/pass_manager.h b/paddle/fluid/inference/analysis/pass_manager.h new file mode 100644 index 0000000000..81a17e0287 --- /dev/null +++ b/paddle/fluid/inference/analysis/pass_manager.h @@ -0,0 +1,106 @@ +/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + +http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +/* + * This file defines the logic of pass management. The analysis for inference is + * a pipeline of Passes, a PassManager is a agency that helps to manage the + * executation of the Passes. + * + * There are two modes of Passes, the first one is called NodePass and takes + * an Node as input and output; the second one is called DFGPass and takes a + * DFG(Data Flow Graph) as input and output. It is hard to put all the passes in + * the same pipeline, there are two kinds of PassManagers, both takes a DFG as + * input and output a DFG, but the Passes inside are different: + * + * 1. NodePassManager: the passes inside are all NodePasses, it can have + * different graph trivial algorithm, for example, DFS_NodePassManager will + * trigger the passes in depth first order; + * 2. DfgPassManager: the passes inside are all DfgPasses. + */ + +#pragma once + +#include +#include "paddle/fluid/framework/program_desc.h" +#include "paddle/fluid/inference/analysis/pass.h" + +namespace paddle { +namespace inference { +namespace analysis { + +/* + * PassManager is the base class for all pass managers, a pass manager has + * several Pass-es registered, and execute them in the linear order. + */ +class PassManager : public OrderedRegistry { + public: + PassManager() = default; + // Call all the passes' Initialize methods. The desc and data_flow_graph are + // globally shared, so pass them as the arguemnts for all the pass managers. + virtual bool Initialize(const Argument& argument) { return false; } + + virtual bool Initialize(Argument* argument); + + // Call all the passes' Finalize methods. + virtual bool Finalize() { + for (auto& pass : data_) { + if (!pass->Finalize()) { + LOG(ERROR) << "Failed to finalize pass [" << pass->repr() << "]"; + return false; + } + } + return true; + } + + // Run all the passes. + virtual void RunAll() = 0; + + // Short identifier. + virtual std::string repr() const = 0; + // Long description. + virtual std::string description() const = 0; + + virtual ~PassManager() = default; + + protected: + Argument* argument_{nullptr}; +}; + +/* + * A pass manager that process a DFG. + */ +class DfgPassManager : public PassManager { + public: + DfgPassManager() = default; + + void RunAll() override; + + virtual ~DfgPassManager() = default; +}; + +/* + * A pass manager that process a Node each time. + */ +class NodePassManager : public PassManager { + public: + NodePassManager() = default; + + void RunAll() override; + + virtual ~NodePassManager() = default; +}; + +} // namespace analysis +} // namespace inference +} // namespace paddle diff --git a/paddle/fluid/inference/analysis/pass_manager_tester.cc b/paddle/fluid/inference/analysis/pass_manager_tester.cc new file mode 100644 index 0000000000..dac1c509d7 --- /dev/null +++ b/paddle/fluid/inference/analysis/pass_manager_tester.cc @@ -0,0 +1,86 @@ +/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + +http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#include + +#include "paddle/fluid/inference/analysis/data_flow_graph_to_fluid_pass.h" +#include "paddle/fluid/inference/analysis/dfg_graphviz_draw_pass.h" +#include "paddle/fluid/inference/analysis/fluid_to_data_flow_graph_pass.h" +#include "paddle/fluid/inference/analysis/pass_manager.h" +#include "paddle/fluid/inference/analysis/ut_helper.h" + +namespace paddle { +namespace inference { +namespace analysis { + +class TestDfgPassManager final : public DfgPassManager { + public: + TestDfgPassManager() = default; + virtual ~TestDfgPassManager() = default; + // Short identifier. + std::string repr() const override { return "test-pass-manager"; } + // Long description. + std::string description() const override { return "test doc"; } +}; + +class TestNodePassManager final : public NodePassManager { + public: + virtual ~TestNodePassManager() = default; + + std::string repr() const override { return "test-node-pass-manager"; } + std::string description() const override { return "test doc"; } +}; + +class TestNodePass final : public NodePass { + public: + virtual ~TestNodePass() = default; + + bool Initialize(Argument* argument) override { return true; } + + void Run(Node* node) override { + LOG(INFO) << "- Processing node " << node->repr(); + } + + std::string repr() const override { return "test-node"; } + std::string description() const override { return "some doc"; } +}; + +TEST_F(DFG_Tester, DFG_pass_manager) { + TestDfgPassManager manager; + DFG_GraphvizDrawPass::Config config("./", "dfg.dot"); + + manager.Register("fluid-to-flow-graph", new FluidToDataFlowGraphPass); + manager.Register("graphviz", new DFG_GraphvizDrawPass(config)); + manager.Register("dfg-to-fluid", new DataFlowGraphToFluidPass); + + ASSERT_TRUE(&argument); + ASSERT_TRUE(manager.Initialize(&argument)); + manager.RunAll(); +} + +TEST_F(DFG_Tester, Node_pass_manager) { + // Pre-process: initialize the DFG with the ProgramDesc first. + FluidToDataFlowGraphPass pass0; + pass0.Initialize(&argument); + pass0.Run(argument.main_dfg.get()); + + TestNodePassManager manager; + manager.Register("test-node-pass", new TestNodePass); + ASSERT_TRUE(manager.Initialize(&argument)); + manager.RunAll(); +} + +} // namespace analysis +} // namespace inference +} // namespace paddle diff --git a/paddle/fluid/inference/analysis/subgraph_splitter.cc b/paddle/fluid/inference/analysis/subgraph_splitter.cc index 43ccac96c8..389f9e1a91 100644 --- a/paddle/fluid/inference/analysis/subgraph_splitter.cc +++ b/paddle/fluid/inference/analysis/subgraph_splitter.cc @@ -119,10 +119,12 @@ void SubGraphFuse::operator()() { ReplaceNodesWithSubGraphs(); } void SubGraphFuse::ReplaceNodesWithSubGraphs() { auto subgraphs = SubGraphSplitter(graph_, node_inside_subgraph_teller_)(); for (auto &subgraph : subgraphs) { + std::unordered_set subgraph_uniq(subgraph.begin(), subgraph.end()); // replace this sub-graph with the first node. Two steps: 1. Create a Block // Node that contains this subgraph 2. Mark the nodes inside the sub-graph // as deleted. 3. Replace the deleted node with the new Block Node. - auto *block_node = graph_->nodes.Create(Node::Type::kFunctionBlock); + auto *block_node = static_cast( + graph_->nodes.Create(Node::Type::kFunctionBlock)); auto io = ExtractInputAndOutputOfSubGraph(subgraph); block_node->inlinks = std::move(io.first); block_node->outlinks = std::move(io.second); @@ -130,21 +132,25 @@ void SubGraphFuse::ReplaceNodesWithSubGraphs() { // TODO(Superjomn) need a unified mechanism to treat deleted node in each // pass. node->SetDeleted(); + block_node->subgraph.push_back(node); } - std::unordered_map - delelte_node_map; // deleted node to BlockNode - for (auto *n : block_node->inlinks) { - n->inlinks.clear(); - } - for (auto *n : block_node->outlinks) { - n->outlinks.clear(); - } - for (auto *n : block_node->inlinks) { - n->outlinks.push_back(block_node); + // Change all the sub-graph's inputs and outputs corresponding inlink and + // outlink to this sub-graph node. + auto inlink_or_outlink_cleaner = [&](std::vector &nodes) { + for (auto *&n : nodes) { + if (subgraph_uniq.count(n)) { + n = block_node; + } + } + std::unordered_set uniq(nodes.begin(), nodes.end()); + nodes.assign(uniq.begin(), uniq.end()); + }; + for (auto *i : block_node->inlinks) { + inlink_or_outlink_cleaner(i->outlinks); } - for (auto *n : block_node->outlinks) { - n->inlinks.push_back(n); + for (auto *&o : block_node->outlinks) { + inlink_or_outlink_cleaner(o->inlinks); } } } diff --git a/paddle/fluid/inference/analysis/subgraph_splitter_tester.cc b/paddle/fluid/inference/analysis/subgraph_splitter_tester.cc index 0644c0db12..67dd4da54b 100644 --- a/paddle/fluid/inference/analysis/subgraph_splitter_tester.cc +++ b/paddle/fluid/inference/analysis/subgraph_splitter_tester.cc @@ -19,22 +19,23 @@ namespace paddle { namespace inference { namespace analysis { +SubGraphSplitter::NodeInsideSubgraphTeller teller = [](const Node* node) { + if (node->type() != Node::Type::kFunction) return false; + const auto* func = static_cast(node); + if (func->func_type() == "elementwise_add" || func->func_type() == "relu" || + func->func_type() == "conv2d" || func->func_type() == "mul" || + func->func_type() == "sigmoid" || func->func_type() == "softmax") { + LOG(INFO) << "sub-graph marked " << node->repr(); + return true; + } + return false; +}; + TEST_F(DFG_Tester, Split) { auto desc = LoadProgramDesc(); auto dfg = ProgramDescToDFG(desc); LOG(INFO) << "spliter\n" << dfg.DotString(); - SubGraphSplitter::NodeInsideSubgraphTeller teller = [](const Node* node) { - if (node->type() != Node::Type::kFunction) return false; - const auto* func = static_cast(node); - if (func->func_type() == "elementwise_add" || func->func_type() == "relu" || - func->func_type() == "conv2d" || func->func_type() == "mul" || - func->func_type() == "sigmoid" || func->func_type() == "softmax") { - LOG(INFO) << "sub-graph marked " << node->repr(); - return true; - } - return false; - }; ASSERT_GT(dfg.nodes.size(), 5UL); auto subgraphs = SubGraphSplitter(&dfg, teller)(); @@ -62,6 +63,28 @@ TEST_F(DFG_Tester, Split) { ASSERT_EQ(subgraphs.back().size(), 6UL); } +TEST_F(DFG_Tester, Fuse) { + auto desc = LoadProgramDesc(); + auto dfg = ProgramDescToDFG(desc); + + size_t count0 = dfg.nodes.size(); + + SubGraphFuse fuse(&dfg, teller); + fuse(); + + int count1 = 0; + for (auto& node : dfg.nodes.nodes()) { + if (node->deleted()) { + LOG(INFO) << "deleted " << node->repr(); + } + count1 += node->deleted(); + } + + // At least one nodes should be deleted. + ASSERT_EQ(dfg.nodes.size(), count0 + 1); // added a new FunctionBlock + ASSERT_EQ(6, count1); +} + } // namespace analysis } // namespace inference } // namespace paddle diff --git a/paddle/fluid/inference/analysis/tensorrt_subgraph_node_mark_pass.cc b/paddle/fluid/inference/analysis/tensorrt_subgraph_node_mark_pass.cc new file mode 100644 index 0000000000..f736e385c1 --- /dev/null +++ b/paddle/fluid/inference/analysis/tensorrt_subgraph_node_mark_pass.cc @@ -0,0 +1,80 @@ +// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include + +#include "paddle/fluid/inference/analysis/analyzer.h" +#include "paddle/fluid/inference/analysis/dfg_graphviz_draw_pass.h" +#include "paddle/fluid/inference/analysis/node_attr_flags.h" +#include "paddle/fluid/inference/analysis/tensorrt_subgraph_node_mark_pass.h" + +namespace paddle { +namespace inference { +namespace analysis { + +void TensorRTSubgraphNodeMarkPass::Run(DataFlowGraph *graph) { + for (auto &node : graph->nodes.nodes()) { + node->attr(ATTR_supported_by_tensorrt).Bool() = teller_(node.get()); + } +} + +class DfgDebuggerPass : public DFG_GraphvizDrawPass { + public: + explicit DfgDebuggerPass(const DFG_GraphvizDrawPass::Config &config) + : DFG_GraphvizDrawPass(config) {} + + std::string repr() const override { + return "tensorrt-subgraph-node-mark-debugger"; + } + + bool Finalize() override { return true; } + + protected: + std::string Draw(DataFlowGraph *graph) override { + Dot dot; + // Add nodes + for (size_t i = 0; i < graph->nodes.size(); i++) { + const Node &node = graph->nodes.Get(i); + if (config_.display_deleted_node || !node.deleted()) { + auto dot_attr = node.dot_attrs(); + if (node.attr(ATTR_supported_by_tensorrt).Bool()) { + dot_attr.assign( + {Dot::Attr{"color", "green"}, Dot::Attr{"style", "filled"}}); + } + dot.AddNode(node.repr(), dot_attr); + } + } + // Add edges + for (size_t i = 0; i < graph->nodes.size(); i++) { + const Node &node = graph->nodes.Get(i); + if (!config_.display_deleted_node && node.deleted()) continue; + for (auto &in : node.inlinks) { + if (!config_.display_deleted_node && in->deleted()) continue; + dot.AddEdge(in->repr(), node.repr(), {}); + } + } + return dot.Build(); + } +}; + +Pass *TensorRTSubgraphNodeMarkPass::CreateGraphvizDebugerPass() const { + DFG_GraphvizDrawPass::Config config( + FLAGS_inference_analysis_graphviz_log_root, "tensorrt_marked_node"); + return new DfgDebuggerPass(config); +} +bool TensorRTSubgraphNodeMarkPass::Finalize() { return true; } + +} // namespace analysis +} // namespace inference +} // namespace paddle diff --git a/paddle/fluid/inference/analysis/tensorrt_subgraph_node_mark_pass.h b/paddle/fluid/inference/analysis/tensorrt_subgraph_node_mark_pass.h new file mode 100644 index 0000000000..c558a6ebbd --- /dev/null +++ b/paddle/fluid/inference/analysis/tensorrt_subgraph_node_mark_pass.h @@ -0,0 +1,60 @@ +// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +/* + * This file defines TensorRTSubgraphNodeMarkPass which helps to mark the ops + * that supported by TensorRT engine. + */ + +#pragma once + +#include +#include "paddle/fluid/inference/analysis/pass.h" +#include "paddle/fluid/inference/analysis/subgraph_splitter.h" + +namespace paddle { +namespace inference { +namespace analysis { + +/* + * Mark the operators that TensorRT engine supports. + */ +class TensorRTSubgraphNodeMarkPass : public DataFlowGraphPass { + public: + using teller_t = SubGraphSplitter::NodeInsideSubgraphTeller; + + explicit TensorRTSubgraphNodeMarkPass(const teller_t& teller) + : teller_(teller) {} + + bool Initialize(Argument* argument) override { return true; } + + // This class get a sub-graph as input and determine whether to transform this + // sub-graph into TensorRT. + void Run(DataFlowGraph* graph) override; + + std::string repr() const override { return "tensorrt-sub-subgraph-mark"; } + std::string description() const override { + return "tensorrt sub-graph mark pass"; + } + + Pass* CreateGraphvizDebugerPass() const override; + bool Finalize() override; + + private: + teller_t teller_; +}; + +} // namespace analysis +} // namespace inference +} // namespace paddle diff --git a/paddle/fluid/inference/analysis/tensorrt_subgraph_node_mark_pass_tester.cc b/paddle/fluid/inference/analysis/tensorrt_subgraph_node_mark_pass_tester.cc new file mode 100644 index 0000000000..a6c15e848b --- /dev/null +++ b/paddle/fluid/inference/analysis/tensorrt_subgraph_node_mark_pass_tester.cc @@ -0,0 +1,50 @@ +// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "paddle/fluid/inference/analysis/tensorrt_subgraph_node_mark_pass.h" + +#include +#include "paddle/fluid/inference/analysis/node_attr_flags.h" +#include "paddle/fluid/inference/analysis/ut_helper.h" + +namespace paddle { +namespace inference { +namespace analysis { + +TEST_F(DFG_Tester, tensorrt_subgraph_node_mark_pass) { + // init + FluidToDataFlowGraphPass pass; + ASSERT_TRUE(pass.Initialize(&argument)); + argument.main_dfg.reset(new DataFlowGraph); + pass.Run(argument.main_dfg.get()); + + TensorRTSubgraphNodeMarkPass::teller_t teller = [](const Node* node) { + return node->IsFunction() && + static_cast(node)->func_type() == "mul"; + }; + TensorRTSubgraphNodeMarkPass pass1(teller); + ASSERT_TRUE(pass1.Initialize(&argument)); + pass1.Run(argument.main_dfg.get()); + + int counter{0}; + for (auto& node : argument.main_dfg->nodes.nodes()) { + counter += node->attr(ATTR_supported_by_tensorrt).Bool(); + } + + LOG(INFO) << counter << " nodes marked"; +} + +} // namespace analysis +} // namespace inference +} // namespace paddle diff --git a/paddle/fluid/inference/analysis/tensorrt_subgraph_pass.cc b/paddle/fluid/inference/analysis/tensorrt_subgraph_pass.cc new file mode 100644 index 0000000000..9993de2280 --- /dev/null +++ b/paddle/fluid/inference/analysis/tensorrt_subgraph_pass.cc @@ -0,0 +1,33 @@ +// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "paddle/fluid/inference/analysis/tensorrt_subgraph_pass.h" +#include "paddle/fluid/inference/analysis/subgraph_splitter.h" + +namespace paddle { +namespace inference { +namespace analysis { + +TensorRTSubGraphPass::TensorRTSubGraphPass( + const TensorRTSubGraphPass::NodeInsideSubgraphTeller &teller) + : node_inside_subgraph_teller_(teller) {} + +void TensorRTSubGraphPass::Run(DataFlowGraph *graph) { + SubGraphFuse(graph, node_inside_subgraph_teller_)(); +} + +} // namespace analysis +} // namespace inference + +} // namespace paddle diff --git a/paddle/fluid/inference/analysis/tensorrt_subgraph_pass.h b/paddle/fluid/inference/analysis/tensorrt_subgraph_pass.h new file mode 100644 index 0000000000..c6741a9209 --- /dev/null +++ b/paddle/fluid/inference/analysis/tensorrt_subgraph_pass.h @@ -0,0 +1,53 @@ +/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + +http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#pragma once + +#include +#include "paddle/fluid/inference/analysis/node.h" +#include "paddle/fluid/inference/analysis/pass.h" +#include "paddle/fluid/inference/analysis/subgraph_splitter.h" + +namespace paddle { +namespace inference { +namespace analysis { + +/* + * Parse the graph and replace TensorRT supported nodes with SubGraphNode + */ +class TensorRTSubGraphPass : public DataFlowGraphPass { + public: + // Tell whether to transform a sub-graph into TensorRT. + using NodeInsideSubgraphTeller = SubGraphFuse::NodeInsideSubgraphTeller; + + explicit TensorRTSubGraphPass(const NodeInsideSubgraphTeller& teller); + + bool Initialize(Argument* argument) override { return true; } + + // This class get a sub-graph as input and determine whether to transform this + // sub-graph into TensorRT. + void Run(DataFlowGraph* graph) override; + + bool Finalize() override { return true; } + + std::string repr() const override { return "tensorrt-sub-graph"; } + std::string description() const override { return "tensorrt sub graph pass"; } + + private: + NodeInsideSubgraphTeller node_inside_subgraph_teller_; +}; + +} // namespace analysis +} // namespace inference +} // namespace paddle diff --git a/paddle/fluid/inference/analysis/tensorrt_subgraph_pass_tester.cc b/paddle/fluid/inference/analysis/tensorrt_subgraph_pass_tester.cc new file mode 100644 index 0000000000..1d749d3fa3 --- /dev/null +++ b/paddle/fluid/inference/analysis/tensorrt_subgraph_pass_tester.cc @@ -0,0 +1,70 @@ +/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + +http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#include "paddle/fluid/inference/analysis/tensorrt_subgraph_pass.h" + +#include +#include +#include "paddle/fluid/inference/analysis/dfg_graphviz_draw_pass.h" +#include "paddle/fluid/inference/analysis/ut_helper.h" + +namespace paddle { +namespace inference { +namespace analysis { + +DEFINE_string(dot_dir, "./", ""); + +TEST_F(DFG_Tester, tensorrt_single_pass) { + std::unordered_set teller_set( + {"elementwise_add", "mul", "sigmoid"}); + SubGraphSplitter::NodeInsideSubgraphTeller teller = [&](const Node* node) { + if (node->type() != Node::Type::kFunction) return false; + const auto* func = static_cast(node); + if (teller_set.count(func->func_type())) return true; + return false; + }; + + LOG(INFO) << "init"; + DFG_GraphvizDrawPass::Config config{FLAGS_dot_dir, "origin"}; + DFG_GraphvizDrawPass::Config config1{FLAGS_dot_dir, "fusion"}; + + DFG_GraphvizDrawPass dfg_pass(config); + DFG_GraphvizDrawPass dfg_pass1(config1); + FluidToDataFlowGraphPass pass0; + TensorRTSubGraphPass trt_pass(std::move(teller)); + + LOG(INFO) << "Initialize"; + dfg_pass.Initialize(&argument); + dfg_pass1.Initialize(&argument); + pass0.Initialize(&argument); + trt_pass.Initialize(&argument); + + LOG(INFO) << "Run"; + argument.main_dfg.reset(new DataFlowGraph); + pass0.Run(argument.main_dfg.get()); + dfg_pass.Run(argument.main_dfg.get()); + trt_pass.Run(argument.main_dfg.get()); + dfg_pass1.Run(argument.main_dfg.get()); + + // Check the TRT op's block desc + for (auto& node : argument.main_dfg->nodes.nodes()) { + if (node->IsFunctionBlock()) { + LOG(INFO) << "get function block"; + } + } +} + +} // namespace analysis +} // namespace inference +} // namespace paddle diff --git a/paddle/fluid/inference/analysis/ut_helper.h b/paddle/fluid/inference/analysis/ut_helper.h index 722fa99a48..ce1191a567 100644 --- a/paddle/fluid/inference/analysis/ut_helper.h +++ b/paddle/fluid/inference/analysis/ut_helper.h @@ -15,33 +15,46 @@ limitations under the License. */ #pragma once #include #include +#include #include #include "paddle/fluid/framework/executor.h" #include "paddle/fluid/inference/analysis/data_flow_graph.h" #include "paddle/fluid/inference/analysis/fluid_to_data_flow_graph_pass.h" #include "paddle/fluid/inference/analysis/ut_helper.h" -#include "paddle/fluid/inference/io.h" namespace paddle { namespace inference { + +// Read ProgramDesc from a __model__ file, defined in io.cc +extern void ReadBinaryFile(const std::string& filename, std::string* contents); + namespace analysis { DEFINE_string(inference_model_dir, "", "inference test model dir"); static framework::proto::ProgramDesc LoadProgramDesc( const std::string& model_dir = FLAGS_inference_model_dir) { - paddle::platform::CPUPlace place; - paddle::framework::Executor executor(place); - paddle::framework::Scope scope; - auto program = Load(&executor, &scope, model_dir); - return *program->Proto(); + std::string msg; + std::string net_file = FLAGS_inference_model_dir + "/__model__"; + std::ifstream fin(net_file, std::ios::in | std::ios::binary); + PADDLE_ENFORCE(static_cast(fin), "Cannot open file %s", net_file); + fin.seekg(0, std::ios::end); + msg.resize(fin.tellg()); + fin.seekg(0, std::ios::beg); + fin.read(&(msg.at(0)), msg.size()); + fin.close(); + framework::proto::ProgramDesc program_desc; + program_desc.ParseFromString(msg); + return program_desc; } static DataFlowGraph ProgramDescToDFG( const framework::proto::ProgramDesc& desc) { DataFlowGraph graph; FluidToDataFlowGraphPass pass; - pass.Initialize(desc); + Argument argument; + argument.origin_program_desc.reset(new framework::proto::ProgramDesc(desc)); + pass.Initialize(&argument); pass.Run(&graph); pass.Finalize(); return graph; @@ -49,9 +62,12 @@ static DataFlowGraph ProgramDescToDFG( class DFG_Tester : public ::testing::Test { protected: - void SetUp() override { desc = LoadProgramDesc(FLAGS_inference_model_dir); } + void SetUp() override { + auto desc = LoadProgramDesc(FLAGS_inference_model_dir); + argument.origin_program_desc.reset(new framework::proto::ProgramDesc(desc)); + } - framework::proto::ProgramDesc desc; + Argument argument; }; } // namespace analysis diff --git a/paddle/fluid/inference/io.cc b/paddle/fluid/inference/io.cc index 6b03ac7119..181868977d 100644 --- a/paddle/fluid/inference/io.cc +++ b/paddle/fluid/inference/io.cc @@ -20,7 +20,7 @@ limitations under the License. */ #include "paddle/fluid/framework/block_desc.h" #include "paddle/fluid/framework/feed_fetch_type.h" #include "paddle/fluid/framework/op_registry.h" -#include "paddle/fluid/operators/math/blas.h" +#include "paddle/fluid/platform/cpu_helper.h" #include "paddle/fluid/pybind/pybind.h" DEFINE_string(devices, "", "The devices to be used which is joined by comma."); @@ -33,7 +33,7 @@ namespace inference { void Init(const std::vector argv) { framework::InitGflags(argv); - operators::math::SetNumThreads(FLAGS_math_num_threads); + platform::SetNumThreads(FLAGS_math_num_threads); // init devices std::vector devices; std::string token; diff --git a/paddle/fluid/inference/io.h b/paddle/fluid/inference/io.h index caf599b1a6..01b50b3670 100644 --- a/paddle/fluid/inference/io.h +++ b/paddle/fluid/inference/io.h @@ -18,9 +18,9 @@ limitations under the License. */ #include #include #include "paddle/fluid/framework/executor.h" -#include "paddle/fluid/framework/init.h" #include "paddle/fluid/framework/program_desc.h" #include "paddle/fluid/framework/scope.h" +#include "paddle/fluid/platform/init.h" namespace paddle { namespace inference { diff --git a/paddle/fluid/inference/tensorrt/convert/op_converter.h b/paddle/fluid/inference/tensorrt/convert/op_converter.h index c7a5a49dd0..6697952051 100644 --- a/paddle/fluid/inference/tensorrt/convert/op_converter.h +++ b/paddle/fluid/inference/tensorrt/convert/op_converter.h @@ -64,7 +64,8 @@ class OpConverter { (*it)(op, scope, test_mode); } - // convert fluid block to tensorrt network + // Convert a fluid block to tensorrt network, NOTE it just convert operators, + // the INetwork's inputs and outputs should specified in some other modules. void ConvertBlock(const framework::proto::BlockDesc& block, const std::unordered_set& parameters, const framework::Scope& scope, TensorRTEngine* engine) { diff --git a/paddle/fluid/inference/tensorrt/engine.h b/paddle/fluid/inference/tensorrt/engine.h index b60f00de9f..b06a9bbc67 100644 --- a/paddle/fluid/inference/tensorrt/engine.h +++ b/paddle/fluid/inference/tensorrt/engine.h @@ -51,11 +51,12 @@ class TensorRTEngine : public EngineBase { nvinfer1::Weights w_; }; - TensorRTEngine(int max_batch, int max_workspace, cudaStream_t* stream, + TensorRTEngine(int max_batch, int max_workspace, + cudaStream_t* stream = nullptr, nvinfer1::ILogger& logger = NaiveLogger::Global()) : max_batch_(max_batch), max_workspace_(max_workspace), - stream_(stream), + stream_(stream ? stream : &default_stream_), logger_(logger) {} virtual ~TensorRTEngine(); @@ -121,6 +122,8 @@ class TensorRTEngine : public EngineBase { // the max memory size the engine uses int max_workspace_; cudaStream_t* stream_; + // If stream_ is not set from outside, hold its own stream. + cudaStream_t default_stream_; nvinfer1::ILogger& logger_; std::vector buffers_; @@ -165,20 +168,31 @@ class TensorRTEngine : public EngineBase { */ class TRT_EngineManager { public: - TensorRTEngine* Create(int max_batch, int max_workspace, - cudaStream_t* stream) { - engines_.emplace_back(new TensorRTEngine(max_batch, max_workspace, stream)); - return engines_.back().get(); + bool HasEngine(const std::string& name) const { + return engines_.count(name) != 0; + } + + // Get an engine called `name`. + TensorRTEngine* Get(const std::string& name) const { + return engines_.at(name).get(); + } + + // Create or get an engine called `name` + TensorRTEngine* Create(int max_batch, int max_workspace, cudaStream_t* stream, + const std::string& name) { + auto* p = new TensorRTEngine(max_batch, max_workspace, stream); + engines_[name].reset(p); + return p; } void DeleteALl() { - for (auto& ptr : engines_) { - ptr.reset(nullptr); + for (auto& item : engines_) { + item.second.reset(nullptr); } } private: - std::vector> engines_; + std::unordered_map> engines_; }; } // namespace tensorrt diff --git a/paddle/fluid/inference/tests/book/test_inference_nlp.cc b/paddle/fluid/inference/tests/book/test_inference_nlp.cc index 9dcd79c3bb..5cc1db12bb 100644 --- a/paddle/fluid/inference/tests/book/test_inference_nlp.cc +++ b/paddle/fluid/inference/tests/book/test_inference_nlp.cc @@ -19,8 +19,8 @@ limitations under the License. */ #include "gflags/gflags.h" #include "gtest/gtest.h" #include "paddle/fluid/inference/tests/test_helper.h" +#include "paddle/fluid/platform/cpu_helper.h" #ifdef PADDLE_WITH_MKLML -#include #include #endif @@ -29,6 +29,7 @@ DEFINE_string(data_file, "", "File of input index data."); DEFINE_int32(repeat, 100, "Running the inference program repeat times"); DEFINE_bool(prepare_vars, true, "Prepare variables before executor"); DEFINE_int32(num_threads, 1, "Number of threads should be used"); +DECLARE_bool(use_mkldnn); inline double GetCurrentMs() { struct timeval time; @@ -103,9 +104,9 @@ void ThreadRunInfer( const int tid, paddle::framework::Scope* scope, const std::vector>& jobs) { // maybe framework:ProgramDesc is not thread-safe + paddle::platform::CPUPlace place; + paddle::framework::Executor executor(place); auto& sub_scope = scope->NewScope(); - auto place = paddle::platform::CPUPlace(); - auto executor = paddle::framework::Executor(place); auto inference_program = paddle::inference::Load(&executor, scope, FLAGS_model_path); @@ -163,7 +164,7 @@ TEST(inference, nlp) { // only use 1 thread number per std::thread omp_set_dynamic(0); omp_set_num_threads(1); - mkl_set_num_threads(1); + paddle::platform::SetNumThreads(1); #endif double start_ms = 0, stop_ms = 0; @@ -182,8 +183,8 @@ TEST(inference, nlp) { stop_ms = GetCurrentMs(); } else { // 1. Define place, executor, scope - auto place = paddle::platform::CPUPlace(); - auto executor = paddle::framework::Executor(place); + paddle::platform::CPUPlace place; + paddle::framework::Executor executor(place); // 2. Initialize the inference_program and load parameters std::unique_ptr inference_program; diff --git a/paddle/fluid/memory/detail/buddy_allocator.cc b/paddle/fluid/memory/detail/buddy_allocator.cc index 4194ba1979..01a8501dd4 100644 --- a/paddle/fluid/memory/detail/buddy_allocator.cc +++ b/paddle/fluid/memory/detail/buddy_allocator.cc @@ -19,8 +19,9 @@ namespace paddle { namespace memory { namespace detail { -BuddyAllocator::BuddyAllocator(SystemAllocator* system_allocator, - size_t min_chunk_size, size_t max_chunk_size) +BuddyAllocator::BuddyAllocator( + std::unique_ptr system_allocator, size_t min_chunk_size, + size_t max_chunk_size) : min_chunk_size_(min_chunk_size), max_chunk_size_(max_chunk_size), cache_(system_allocator->UseGpu()), diff --git a/paddle/fluid/memory/detail/buddy_allocator.h b/paddle/fluid/memory/detail/buddy_allocator.h index 2f39d774d6..f0c83efc23 100644 --- a/paddle/fluid/memory/detail/buddy_allocator.h +++ b/paddle/fluid/memory/detail/buddy_allocator.h @@ -14,6 +14,7 @@ limitations under the License. */ #pragma once +#include #include // NOLINT #include #include @@ -32,8 +33,8 @@ namespace detail { class BuddyAllocator { public: - BuddyAllocator(SystemAllocator* system_allocator, size_t min_chunk_size, - size_t max_chunk_size); + BuddyAllocator(std::unique_ptr system_allocator, + size_t min_chunk_size, size_t max_chunk_size); ~BuddyAllocator(); @@ -103,7 +104,7 @@ class BuddyAllocator { private: /*! Allocate CPU/GPU memory from system */ - SystemAllocator* system_allocator_; + std::unique_ptr system_allocator_; std::mutex mutex_; }; diff --git a/paddle/fluid/memory/detail/system_allocator.cc b/paddle/fluid/memory/detail/system_allocator.cc index d539052916..9b1ab1e228 100644 --- a/paddle/fluid/memory/detail/system_allocator.cc +++ b/paddle/fluid/memory/detail/system_allocator.cc @@ -43,14 +43,16 @@ void* CPUAllocator::Alloc(size_t* index, size_t size) { *index = 0; // unlock memory - void* p; + void* p = nullptr; #ifdef PADDLE_WITH_MKLDNN // refer to https://github.com/01org/mkl-dnn/blob/master/include/mkldnn.hpp // memory alignment - PADDLE_ENFORCE_EQ(posix_memalign(&p, 4096ul, size), 0); + PADDLE_ENFORCE_EQ(posix_memalign(&p, 4096ul, size), 0, "Alloc %ld error!", + size); #else - PADDLE_ENFORCE_EQ(posix_memalign(&p, 32ul, size), 0); + PADDLE_ENFORCE_EQ(posix_memalign(&p, 32ul, size), 0, "Alloc %ld error!", + size); #endif PADDLE_ENFORCE(p, "Fail to allocate CPU memory: size = %d .", size); diff --git a/paddle/fluid/memory/malloc.cc b/paddle/fluid/memory/malloc.cc index 0c74f62de5..7c800b3c16 100644 --- a/paddle/fluid/memory/malloc.cc +++ b/paddle/fluid/memory/malloc.cc @@ -12,6 +12,8 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. */ +#include + #include "paddle/fluid/memory/malloc.h" #include "glog/logging.h" @@ -20,6 +22,12 @@ limitations under the License. */ #include "paddle/fluid/memory/detail/system_allocator.h" #include "paddle/fluid/platform/gpu_info.h" +DEFINE_bool(init_allocated_mem, false, + "It is a mistake that the values of the memory allocated by " + "BuddyAllocator are always zeroed in some op's implementation. " + "To find this error in time, we use init_allocated_mem to indicate " + "that initializing the allocated memory with a small value " + "during unit testing."); DECLARE_double(fraction_of_gpu_memory_to_use); namespace paddle { @@ -28,12 +36,15 @@ namespace memory { using BuddyAllocator = detail::BuddyAllocator; BuddyAllocator* GetCPUBuddyAllocator() { + static std::once_flag init_flag; static detail::BuddyAllocator* a = nullptr; - if (a == nullptr) { - a = new detail::BuddyAllocator(new detail::CPUAllocator, - platform::CpuMinChunkSize(), - platform::CpuMaxChunkSize()); - } + + std::call_once(init_flag, []() { + a = new detail::BuddyAllocator( + std::unique_ptr(new detail::CPUAllocator), + platform::CpuMinChunkSize(), platform::CpuMaxChunkSize()); + }); + return a; } @@ -41,6 +52,9 @@ template <> void* Alloc(platform::CPUPlace place, size_t size) { VLOG(10) << "Allocate " << size << " bytes on " << platform::Place(place); void* p = GetCPUBuddyAllocator()->Alloc(size); + if (FLAGS_init_allocated_mem) { + memset(p, 0xEF, size); + } VLOG(10) << " pointer=" << p; return p; } @@ -59,27 +73,33 @@ size_t Used(platform::CPUPlace place) { #ifdef PADDLE_WITH_CUDA BuddyAllocator* GetGPUBuddyAllocator(int gpu_id) { - static BuddyAllocator** as = NULL; - if (as == NULL) { + static std::once_flag init_flag; + static detail::BuddyAllocator** a_arr = nullptr; + + std::call_once(init_flag, [gpu_id]() { int gpu_num = platform::GetCUDADeviceCount(); - as = new BuddyAllocator*[gpu_num]; - for (int gpu = 0; gpu < gpu_num; gpu++) { - as[gpu] = nullptr; + PADDLE_ENFORCE(gpu_id < gpu_num, "gpu_id:%d should < gpu_num:%d", gpu_id, + gpu_num); + + a_arr = new BuddyAllocator*[gpu_num]; + for (int i = 0; i < gpu_num; i++) { + a_arr[i] = nullptr; + platform::SetDeviceId(i); + a_arr[i] = new BuddyAllocator( + std::unique_ptr(new detail::GPUAllocator(i)), + platform::GpuMinChunkSize(), platform::GpuMaxChunkSize()); + + VLOG(10) << "\n\nNOTE: each GPU device use " + << FLAGS_fraction_of_gpu_memory_to_use * 100 + << "% of GPU memory.\n" + << "You can set GFlags environment variable '" + << "FLAGS_fraction_of_gpu_memory_to_use" + << "' to change the fraction of GPU usage.\n\n"; } - } + }); + platform::SetDeviceId(gpu_id); - if (!as[gpu_id]) { - as[gpu_id] = new BuddyAllocator(new detail::GPUAllocator(gpu_id), - platform::GpuMinChunkSize(), - platform::GpuMaxChunkSize()); - VLOG(10) << "\n\nNOTE: each GPU device use " - << FLAGS_fraction_of_gpu_memory_to_use * 100 - << "% of GPU memory.\n" - << "You can set GFlags environment variable '" - << "FLAGS_fraction_of_gpu_memory_to_use" - << "' to change the fraction of GPU usage.\n\n"; - } - return as[gpu_id]; + return a_arr[gpu_id]; } template <> @@ -104,6 +124,9 @@ void* Alloc(platform::CUDAPlace place, size_t size) { LOG(WARNING) << "GPU memory used: " << Used(place); platform::SetDeviceId(cur_dev); } + if (FLAGS_init_allocated_mem) { + cudaMemset(ptr, 0xEF, size); + } return ptr; } @@ -113,12 +136,16 @@ void Free(platform::CUDAPlace place, void* p) { } BuddyAllocator* GetCUDAPinnedBuddyAllocator() { - static BuddyAllocator* ba = NULL; - if (ba == NULL) { - ba = new BuddyAllocator(new detail::CUDAPinnedAllocator, + static std::once_flag init_flag; + static BuddyAllocator* ba = nullptr; + + std::call_once(init_flag, []() { + ba = new BuddyAllocator(std::unique_ptr( + new detail::CUDAPinnedAllocator), platform::CUDAPinnedMinChunkSize(), platform::CUDAPinnedMaxChunkSize()); - } + }); + return ba; } @@ -137,6 +164,9 @@ void* Alloc(platform::CUDAPinnedPlace place, LOG(WARNING) << "cudaMallocHost Cannot allocate " << size << " bytes in CUDAPinnedPlace"; } + if (FLAGS_init_allocated_mem) { + memset(ptr, 0xEF, size); + } return ptr; } diff --git a/paddle/fluid/operators/CMakeLists.txt b/paddle/fluid/operators/CMakeLists.txt index e4bb04295b..b35a32a553 100644 --- a/paddle/fluid/operators/CMakeLists.txt +++ b/paddle/fluid/operators/CMakeLists.txt @@ -184,35 +184,41 @@ else() set(DEPS_OPS ${DEPS_OPS} nccl_op) endif() -add_subdirectory(detail) +set(DISTRIBUTE_DEPS "") if(WITH_DISTRIBUTE) - + add_subdirectory(distributed) + set(DISTRIBUTE_DEPS "") if(WITH_GRPC) set(DISTRIBUTE_DEPS sendrecvop_grpc grpc++_unsecure grpc_unsecure gpr cares zlib protobuf) else() set(DISTRIBUTE_DEPS sendrecvop_brpc brpc leveldb snappystream snappy protobuf ssl crypto zlib) + if(WITH_BRPC_RDMA) + find_library(IBVERBS_LIBRARY NAMES ibverbs) + ADD_LIBRARY(ibverbs SHARED IMPORTED GLOBAL) + SET_PROPERTY(TARGET ibverbs PROPERTY IMPORTED_LOCATION ${IBVERBS_LIBRARY}) + + + find_library(RDMACM_LIBRARY NAMES rdmacm) + ADD_LIBRARY(rdmacm SHARED IMPORTED GLOBAL) + SET_PROPERTY(TARGET rdmacm PROPERTY IMPORTED_LOCATION ${RDMACM_LIBRARY}) + + set(DISTRIBUTE_DEPS ${DISTRIBUTE_DEPS} ibverbs rdmacm) + endif() endif() set(DISTRIBUTE_COMPILE_FLAGS "-Wno-non-virtual-dtor -Wno-error=non-virtual-dtor -Wno-error=delete-non-virtual-dtor") - op_library(prefetch_op DEPS ${DISTRIBUTE_DEPS}) - set_source_files_properties(prefetch_op.cc PROPERTIES COMPILE_FLAGS ${DISTRIBUTE_COMPILE_FLAGS}) - op_library(recv_op DEPS ${DISTRIBUTE_DEPS}) - set_source_files_properties(recv_op.cc PROPERTIES COMPILE_FLAGS ${DISTRIBUTE_COMPILE_FLAGS}) - op_library(listen_and_serv_op DEPS ${DISTRIBUTE_DEPS}) - set_source_files_properties(listen_and_serv_op.cc PROPERTIES COMPILE_FLAGS ${DISTRIBUTE_COMPILE_FLAGS}) - op_library(send_op DEPS ${DISTRIBUTE_DEPS}) - set_source_files_properties(send_op.cc PROPERTIES COMPILE_FLAGS ${DISTRIBUTE_COMPILE_FLAGS}) - op_library(send_barrier_op DEPS ${DISTRIBUTE_DEPS}) - op_library(fetch_barrier_op DEPS ${DISTRIBUTE_DEPS}) - set_source_files_properties(send_barrier_op.cc PROPERTIES COMPILE_FLAGS ${DISTRIBUTE_COMPILE_FLAGS}) - set_source_files_properties(fetch_barrier_op.cc PROPERTIES COMPILE_FLAGS ${DISTRIBUTE_COMPILE_FLAGS}) + foreach(dist_op "prefetch_op" "checkpoint_notify_op" "listen_and_serv_op" "send_op" "recv_op" "send_barrier_op" "fetch_barrier_op") + op_library(${dist_op} DEPS ${DISTRIBUTE_DEPS}) + set_source_files_properties(${dist_op}.cc PROPERTIES COMPILE_FLAGS ${DISTRIBUTE_COMPILE_FLAGS}) + endforeach() + #set_source_files_properties(send_recv_op_test.cc PROPERTIES COMPILE_FLAGS ${DISTRIBUTE_COMPILE_FLAGS}) #cc_test(test_send_recv SRCS send_recv_op_test.cc DEPS prefetch_op send_op # listen_and_serv_op sum_op executor SERIAL) if(WITH_GPU) set_source_files_properties(test_send_nccl_id.cc PROPERTIES COMPILE_FLAGS ${DISTRIBUTE_COMPILE_FLAGS}) - cc_test(test_send_nccl_id SRCS test_send_nccl_id.cc DEPS listen_and_serv_op executor SERIAL) + cc_test(test_send_nccl_id SRCS test_send_nccl_id.cc DEPS listen_and_serv_op ${DISTRIBUTE_DEPS} executor SERIAL) if(WITH_GRPC) op_library(gen_nccl_id_op DEPS nccl_common sendrecvop_grpc) else() @@ -223,7 +229,7 @@ if(WITH_DISTRIBUTE) set(DEPS_OPS ${DEPS_OPS} gen_nccl_id_op) endif() else() - set(DEPS_OPS ${DEPS_OPS} prefetch_op recv_op listen_and_serv_op send_op send_barrier_op fetch_barrier_op gen_nccl_id_op) + set(DEPS_OPS ${DEPS_OPS} checkpoint_notify_op prefetch_op recv_op listen_and_serv_op send_op send_barrier_op fetch_barrier_op gen_nccl_id_op) endif() op_library(cross_entropy_op DEPS cross_entropy) @@ -233,7 +239,8 @@ op_library(sequence_softmax_op DEPS softmax) if (WITH_GPU AND TENSORRT_FOUND) op_library(tensorrt_engine_op DEPS tensorrt_engine) nv_test(test_tensorrt_engine_op SRCS tensorrt_engine_op_test.cc - DEPS tensorrt_engine_op tensorrt_engine tensorrt_converter) + DEPS tensorrt_engine_op tensorrt_engine tensorrt_converter + analysis) else() set(DEPS_OPS ${DEPS_OPS} tensorrt_engine_op) endif() @@ -304,6 +311,7 @@ foreach(src ${DETECTION_LIBRARY}) endforeach() set(GLOB_OP_LIB ${OP_LIBRARY} CACHE INTERNAL "Global OP library") +set(GLOB_DISTRIBUTE_DEPS ${DISTRIBUTE_DEPS} CACHE INTERNAL "distributed dependency") cc_test(gather_test SRCS gather_test.cc DEPS tensor) cc_test(scatter_test SRCS scatter_test.cc DEPS tensor) diff --git a/paddle/fluid/operators/activation_mkldnn_op.cc b/paddle/fluid/operators/activation_mkldnn_op.cc index 46ed99bcf2..137bca5e2b 100644 --- a/paddle/fluid/operators/activation_mkldnn_op.cc +++ b/paddle/fluid/operators/activation_mkldnn_op.cc @@ -12,16 +12,20 @@ See the License for the specific language governing permissions and limitations under the License. */ -#include "mkldnn.hpp" #include "paddle/fluid/operators/activation_op.h" -#include "paddle/fluid/operators/mkldnn_activation_op.h" #include "paddle/fluid/platform/mkldnn_helper.h" namespace paddle { namespace operators { -using paddle::framework::Tensor; -using paddle::platform::MKLDNNDeviceContext; +using framework::DataLayout; +using framework::Tensor; +using mkldnn::memory; +using mkldnn::primitive; +using mkldnn::stream; +using platform::GetMKLDNNFormat; +using platform::MKLDNNDeviceContext; +using platform::to_void_cast; namespace { std::string gethash(const mkldnn::memory::dims &operand_dims, @@ -35,188 +39,260 @@ std::string gethash(const mkldnn::memory::dims &operand_dims, }; return dim2str(operand_dims) + std::to_string(algorithm); } +} // namespace + +template +class MKLDNNActivationKernel + : public framework::OpKernel { + public: + void Compute(const framework::ExecutionContext &ctx) const override { + const auto *x = ctx.Input("X"); + PADDLE_ENFORCE(x->layout() == DataLayout::kMKLDNN && + x->format() != memory::format::format_undef, + "Wrong layout/format set for Input x tensor"); + + Functor functor; + + auto attrs = functor.GetAttrs(); + for (auto &attr : attrs) { + *attr.second = ctx.Attr(attr.first); + } + functor(ctx); + } +}; -template -void eltwise_forward(const ExecContext &ctx, mkldnn::algorithm algorithm, - const T alpha = 0, const T beta = 0) { +template +class MKLDNNActivationGradKernel + : public framework::OpKernel { + public: + void Compute(const framework::ExecutionContext &ctx) const override { + const auto *diff_y = ctx.Input(framework::GradVarName("Out")); + PADDLE_ENFORCE(diff_y->layout() == DataLayout::kMKLDNN && + diff_y->format() != memory::format::format_undef, + "Wrong layout/format set for Input OutGrad tensor"); + + Functor functor; + + auto attrs = functor.GetAttrs(); + for (auto &attr : attrs) { + *attr.second = ctx.Attr(attr.first); + } + functor(ctx); + } +}; + +template +void eltwise_forward(const framework::ExecutionContext &ctx, + mkldnn::algorithm algorithm, const T alpha = 0, + const T beta = 0) { PADDLE_ENFORCE(paddle::platform::is_cpu_place(ctx.GetPlace()), "It must use CPUPlace."); - auto &dev_ctx = ctx.template device_context(); const auto &mkldnn_engine = dev_ctx.GetEngine(); - // get buffers - const auto *src = ctx.template Input("X"); - const auto *src_data = src->template data(); + const auto *x = ctx.Input("X"); + auto *y = ctx.Output("Out"); - auto *dst = ctx.template Output("Out"); - T *dst_data = dst->template mutable_data(ctx.GetPlace()); + const T *x_data = x->data(); + T *y_data = y->mutable_data(ctx.GetPlace()); - // get memory dim - PADDLE_ENFORCE(src->dims().size() == 2 || src->dims().size() == 4, + PADDLE_ENFORCE(x->dims().size() == 2 || x->dims().size() == 4, "Input dim must be with 2 or 4"); - std::vector src_tz = framework::vectorize2int(src->dims()); + + std::vector src_tz = framework::vectorize2int(x->dims()); + + auto src_format = + src_tz.size() == 2 ? mkldnn::memory::format::nc : x->format(); const std::string key = gethash(src_tz, algorithm); const std::string key_src_data = key + ctx.op().Output("Out") + "@eltwise_fwd_src_data"; - const std::string key_src_mem = key + "@eltwise_fwd_src_mem"; - const std::string key_dst_mem = key + "@eltwise_fwd_dst_mem"; - const std::string key_fwd = key + "@eltwise_fwd"; + const std::string key_src_layout = + key + ctx.op().Output("Out") + "@eltwise_fwd_src_layout"; + const std::string key_with_layout = key + std::to_string(src_format); + const std::string key_src_mem = key_with_layout + "@eltwise_fwd_src_mem"; + const std::string key_dst_mem = key_with_layout + "@eltwise_fwd_dst_mem"; + const std::string key_fwd = key_with_layout + "@eltwise_fwd"; + const std::string key_fwd_pd = key_with_layout + "@eltwise_fwd_pd"; + + // save input data and layout to be referred in backward path + auto p_src_data = std::make_shared(x_data); + dev_ctx.SetBlob(key_src_data, p_src_data); + auto p_src_layout = std::make_shared(src_format); + dev_ctx.SetBlob(key_src_layout, p_src_layout); auto p_fwd = std::static_pointer_cast( dev_ctx.GetBlob(key_fwd)); - // save input data to be referred in backward path - auto p_src_data = std::make_shared(src_data); - dev_ctx.SetBlob(key_src_data, p_src_data); + std::shared_ptr dst_memory; if (p_fwd == nullptr) { - // create memory description - auto data_md = src_tz.size() == 2 - ? platform::MKLDNNMemDesc(src_tz, mkldnn::memory::f32, - mkldnn::memory::format::nc) - : platform::MKLDNNMemDesc(src_tz, mkldnn::memory::f32, - mkldnn::memory::format::nchw); - - // create memory primitives - auto p_src_mem = std::make_shared(mkldnn::memory( - {data_md, mkldnn_engine}, platform::to_void_cast(src_data))); - dev_ctx.SetBlob(key_src_mem, p_src_mem); - - auto p_dst_mem = std::make_shared(mkldnn::memory( - {data_md, mkldnn_engine}, platform::to_void_cast(dst_data))); - dev_ctx.SetBlob(key_dst_mem, p_dst_mem); - - auto fwd_desc = mkldnn::eltwise_forward::desc( - mkldnn::prop_kind::forward_training, algorithm, data_md, alpha, beta); - auto p_fwd_pd = std::make_shared( - fwd_desc, mkldnn_engine); - const std::string key_fwd_pd = key + "eltwise_fwd_pd"; - dev_ctx.SetBlob(key_fwd_pd, p_fwd_pd); - p_fwd = std::make_shared( - *p_fwd_pd, *(p_src_mem.get()), *(p_dst_mem.get())); + // create mkldnn memory for input X + auto src_md = platform::MKLDNNMemDesc( + src_tz, platform::MKLDNNGetDataType(), src_format); + auto src_memory = std::shared_ptr( + new memory({src_md, mkldnn_engine}, to_void_cast(x_data))); + // save src_memory to be referred in backward path + dev_ctx.SetBlob(key_src_mem, src_memory); + + // create primitive descriptor for activation forward and save it + auto forward_desc = mkldnn::eltwise_forward::desc( + mkldnn::prop_kind::forward_training, algorithm, + src_memory->get_primitive_desc().desc(), alpha, beta); + auto forward_pd = std::make_shared( + forward_desc, mkldnn_engine); + + // save prim desc into global device context to be referred in backward path + dev_ctx.SetBlob(key_fwd_pd, forward_pd); + + // create mkldnn memory for output y + dst_memory = + std::make_shared(forward_pd->dst_primitive_desc(), y_data); + + dev_ctx.SetBlob(key_dst_mem, dst_memory); + + // create activation primitive + p_fwd = std::make_shared(*forward_pd, *src_memory, + *dst_memory); dev_ctx.SetBlob(key_fwd, p_fwd); } else { // primitives already exist - auto p_src_mem = + auto src_memory = std::static_pointer_cast(dev_ctx.GetBlob(key_src_mem)); - PADDLE_ENFORCE(p_src_mem != nullptr, - "Fail to find eltwise p_src_mem in device context."); - auto p_dst_mem = + PADDLE_ENFORCE(src_memory != nullptr, + "Fail to find eltwise src_memory in device context."); + dst_memory = std::static_pointer_cast(dev_ctx.GetBlob(key_dst_mem)); - PADDLE_ENFORCE(p_dst_mem != nullptr, - "Fail to find eltwise p_src_mem in device context."); + PADDLE_ENFORCE(dst_memory != nullptr, + "Fail to find eltwise dst_memory in device context."); - p_src_mem->set_data_handle(platform::to_void_reinterpret_cast(src_data)); - p_dst_mem->set_data_handle(dst_data); + src_memory->set_data_handle(platform::to_void_cast(x_data)); + dst_memory->set_data_handle(y_data); } // push primitive to stream and wait until it's executed - std::vector pipeline = {*(p_fwd.get())}; - mkldnn::stream(mkldnn::stream::kind::eager).submit(pipeline).wait(); + std::vector pipeline; + pipeline.push_back(*p_fwd); + stream(stream::kind::eager).submit(pipeline).wait(); + + y->set_layout(DataLayout::kMKLDNN); + y->set_format(GetMKLDNNFormat(*dst_memory)); } -template -void eltwise_grad(const ExecContext &ctx, mkldnn::algorithm algorithm, - const T alpha = 0, const T beta = 0) { +template +void eltwise_grad(const framework::ExecutionContext &ctx, + mkldnn::algorithm algorithm, const T alpha = 0, + const T beta = 0) { auto &dev_ctx = ctx.template device_context(); const auto &mkldnn_engine = dev_ctx.GetEngine(); - // get buffers - const auto *out = ctx.template Input("Out"); - - auto *dout = ctx.template Input(framework::GradVarName("Out")); - const auto *diff_dst = dout->template data(); + const auto *diff_y = ctx.Input(framework::GradVarName("Out")); + auto *diff_x = ctx.Output(framework::GradVarName("X")); - auto *dx = - ctx.template Output(framework::GradVarName("X")); - const T *diff_src = dx->template mutable_data(ctx.GetPlace()); + const T *diff_y_data = diff_y->data(); + T *diff_x_data = diff_x->mutable_data(ctx.GetPlace()); - // get memory dim - std::vector src_tz = framework::vectorize2int(out->dims()); + std::vector diff_dst_tz = framework::vectorize2int(diff_y->dims()); - const std::string key = gethash(src_tz, algorithm); - const std::string key_diff_src_mem = key + "@eltwise_diff_src_mem"; - const std::string key_diff_dst_mem = key + "@eltwise_diff_dst_mem"; - const std::string key_grad = key + "@eltwise_grad"; + auto diff_y_format = + diff_dst_tz.size() == 2 ? mkldnn::memory::format::nc : diff_y->format(); + const std::string key = gethash(diff_dst_tz, algorithm); const std::string key_src_data = key + ctx.op().Input("Out") + "@eltwise_fwd_src_data"; + const std::string key_src_layout = + key + ctx.op().Input("Out") + "@eltwise_fwd_src_layout"; + const auto p_src_layout = + std::static_pointer_cast(dev_ctx.GetBlob(key_src_layout)); + const std::string key_src_mem = + key + std::to_string(*p_src_layout) + "@eltwise_fwd_src_mem"; + const std::string key_fwd_pd = + key + std::to_string(*p_src_layout) + "@eltwise_fwd_pd"; + const std::string key_with_layouts = + key + std::to_string(*p_src_layout) + "-" + std::to_string(diff_y_format); + const std::string key_diff_src_mem = + key_with_layouts + "@eltwise_diff_src_mem"; + const std::string key_diff_dst_mem = + key_with_layouts + "@eltwise_diff_dst_mem"; + const std::string key_grad = key_with_layouts + "@eltwise_grad"; + const auto p_src_data = std::static_pointer_cast(dev_ctx.GetBlob(key_src_data)); - const std::string key_src_mem = key + "@eltwise_fwd_src_mem"; - auto p_src_mem = + auto src_memory = std::static_pointer_cast(dev_ctx.GetBlob(key_src_mem)); - p_src_mem->set_data_handle(*p_src_data.get()); + PADDLE_ENFORCE(src_memory != nullptr, + "Fail to find src_memory in device context"); + src_memory->set_data_handle(*p_src_data.get()); + + std::shared_ptr diff_src_memory; - auto p_grad = std::static_pointer_cast( + auto p_grad = std::static_pointer_cast( dev_ctx.GetBlob(key_grad)); if (p_grad == nullptr) { - // create memory description - auto data_md = src_tz.size() == 2 - ? platform::MKLDNNMemDesc(src_tz, mkldnn::memory::f32, - mkldnn::memory::format::nc) - : platform::MKLDNNMemDesc(src_tz, mkldnn::memory::f32, - mkldnn::memory::format::nchw); - - // create memory primitives - std::shared_ptr p_diff_src_mem = - std::make_shared(mkldnn::memory( - {data_md, mkldnn_engine}, platform::to_void_cast(diff_src))); - dev_ctx.SetBlob(key_diff_src_mem, p_diff_src_mem); - std::shared_ptr p_diff_dst_mem = - std::make_shared(mkldnn::memory( - {data_md, mkldnn_engine}, platform::to_void_cast(diff_dst))); - dev_ctx.SetBlob(key_diff_dst_mem, p_diff_dst_mem); - - auto bwd_desc = mkldnn::eltwise_backward::desc(algorithm, data_md, data_md, - alpha, beta); - - const std::string key_fwd_pd = key + "eltwise_fwd_pd"; - auto *p_fwd_pd = static_cast( - dev_ctx.GetBlob(key_fwd_pd).get()); - - auto eltwise_bwd_prim_desc = mkldnn::eltwise_backward::primitive_desc( - bwd_desc, mkldnn_engine, *p_fwd_pd); - + // create mkldnn memory for input diff_y + auto diff_dst_md = platform::MKLDNNMemDesc( + diff_dst_tz, platform::MKLDNNGetDataType(), diff_y_format); + auto diff_dst_memory = std::shared_ptr( + new memory({diff_dst_md, mkldnn_engine}, to_void_cast(diff_y_data))); + dev_ctx.SetBlob(key_diff_dst_mem, diff_dst_memory); + + // retrieve eltwise primitive desc from device context + auto forward_pd = + std::static_pointer_cast( + dev_ctx.GetBlob(key_fwd_pd)); + PADDLE_ENFORCE(forward_pd != nullptr, + "Fail to find eltwise_fwd_pd in device context"); + + // ceate primitive descriptor for activation backward + auto backward_desc = mkldnn::eltwise_backward::desc( + algorithm, diff_dst_memory->get_primitive_desc().desc(), + src_memory->get_primitive_desc().desc(), alpha, beta); + auto backward_pd = mkldnn::eltwise_backward::primitive_desc( + backward_desc, mkldnn_engine, *forward_pd); + + // create mkldnn memory for output diff_src + diff_src_memory = std::make_shared( + backward_pd.diff_src_primitive_desc(), diff_x_data); + dev_ctx.SetBlob(key_diff_src_mem, diff_src_memory); + + // create activation backward primitive p_grad = std::make_shared( - eltwise_bwd_prim_desc, *static_cast(p_src_mem.get()), - *(static_cast(p_diff_dst_mem.get())), - *(static_cast(p_diff_src_mem.get()))); + backward_pd, *src_memory, *diff_dst_memory, *diff_src_memory); + dev_ctx.SetBlob(key_grad, p_grad); } else { // primitives already exist - auto p_diff_src_mem = std::static_pointer_cast( + diff_src_memory = std::static_pointer_cast( dev_ctx.GetBlob(key_diff_src_mem)); - auto p_diff_dst_mem = std::static_pointer_cast( + auto diff_dst_memory = std::static_pointer_cast( dev_ctx.GetBlob(key_diff_dst_mem)); - p_diff_src_mem->set_data_handle( - platform::to_void_reinterpret_cast(diff_src)); - p_diff_dst_mem->set_data_handle( - platform::to_void_reinterpret_cast(diff_dst)); + diff_src_memory->set_data_handle( + platform::to_void_reinterpret_cast(diff_x_data)); + diff_dst_memory->set_data_handle( + platform::to_void_reinterpret_cast(diff_y_data)); } // push primitive to stream and wait until it's executed - std::vector pipeline = {*(p_grad.get())}; - mkldnn::stream(mkldnn::stream::kind::eager).submit(pipeline).wait(); + std::vector pipeline; + pipeline.push_back(*p_grad); + stream(stream::kind::eager).submit(pipeline).wait(); + + diff_x->set_layout(DataLayout::kMKLDNN); + diff_x->set_format(GetMKLDNNFormat(*diff_src_memory)); } -} // anonymous namespace template struct MKLDNNActivationFunc : public BaseActivationFunctor { - template - void operator()(const ExecContext &ctx) const { + void operator()(const framework::ExecutionContext &ctx) const { eltwise_forward(ctx, algorithm); } }; template struct MKLDNNActivationGradFunc : public BaseActivationFunctor { - template - void operator()(const ExecContext &ctx) const { + void operator()(const framework::ExecutionContext &ctx) const { eltwise_grad(ctx, algorithm); } }; diff --git a/paddle/fluid/operators/activation_op.cc b/paddle/fluid/operators/activation_op.cc index af1d85047e..286b03d7b7 100644 --- a/paddle/fluid/operators/activation_op.cc +++ b/paddle/fluid/operators/activation_op.cc @@ -19,6 +19,8 @@ limitations under the License. */ namespace paddle { namespace operators { +using paddle::framework::Tensor; + #define REGISTER_ACTIVATION_OP_MAKER(OP_NAME, OP_COMMENT) \ class OP_NAME##OpMaker \ : public ::paddle::framework::OpProtoAndCheckerMaker { \ @@ -29,7 +31,7 @@ namespace operators { AddAttr("use_mkldnn", \ "(bool, default false) Only used in mkldnn kernel") \ .SetDefault(false); \ - AddComment(OP_COMMENT); \ + AddComment(#OP_COMMENT); \ } \ } @@ -58,7 +60,6 @@ framework::OpKernelType GetKernelType(const framework::ExecutionContext& ctx, const framework::OperatorWithKernel& oper, const std::string& name) { framework::LibraryType library{framework::LibraryType::kPlain}; - framework::DataLayout layout = framework::DataLayout::kAnyLayout; #ifdef PADDLE_WITH_MKLDNN auto it = oper.Attrs().find("use_mkldnn"); @@ -82,6 +83,7 @@ class ActivationOp : public framework::OperatorWithKernel { ctx->ShareLoD("X", /*->*/ "Out"); } + protected: framework::OpKernelType GetExpectedKernelType( const framework::ExecutionContext& ctx) const override { return GetKernelType(ctx, *this, "X"); @@ -96,6 +98,7 @@ class ActivationOpGrad : public framework::OperatorWithKernel { ctx->SetOutputDim(framework::GradVarName("X"), ctx->GetInputDim("Out")); } + protected: framework::OpKernelType GetExpectedKernelType( const framework::ExecutionContext& ctx) const override { return GetKernelType(ctx, *this, "Out"); @@ -112,7 +115,7 @@ $$out = \frac{1}{1 + e^{-x}}$$ __attribute__((unused)) constexpr char LogSigmoidDoc[] = R"DOC( Logsigmoid Activation Operator -$$out = \log \frac{1}{1 + e^{-x}}$$ +$$out = \\log \\frac{1}{1 + e^{-x}}$$ )DOC"; @@ -133,14 +136,14 @@ $out = \max(x, 0)$ __attribute__((unused)) constexpr char TanhDoc[] = R"DOC( Tanh Activation Operator. -$$out = \frac{e^{x} - e^{-x}}{e^{x} + e^{-x}}$$ +$$out = \\frac{e^{x} - e^{-x}}{e^{x} + e^{-x}}$$ )DOC"; __attribute__((unused)) constexpr char TanhShrinkDoc[] = R"DOC( TanhShrink Activation Operator. -$$out = x - \frac{e^{x} - e^{-x}}{e^{x} + e^{-x}}$$ +$$out = x - \\frac{e^{x} - e^{-x}}{e^{x} + e^{-x}}$$ )DOC"; @@ -196,7 +199,7 @@ $out = [x]$ __attribute__((unused)) constexpr char ReciprocalDoc[] = R"DOC( Reciprocal Activation Operator. -$$out = \frac{1}{x}$$ +$$out = \\frac{1}{x}$$ )DOC"; @@ -252,15 +255,14 @@ class SoftShrinkOpMaker : public framework::OpProtoAndCheckerMaker { AddOutput("Out", "Output of Softshrink operator"); AddAttr("lambda", "non-negative offset").SetDefault(0.5f); AddComment(R"DOC( -Softshrink Activation Operator. +:strong:`Softshrink Activation Operator` -$$ -out = \begin{cases} - x - \lambda, \text{if } x > \lambda \\ - x + \lambda, \text{if } x < -\lambda \\ - 0, \text{otherwise} - \end{cases} -$$ +.. math:: + out = \begin{cases} + x - \lambda, \text{if } x > \lambda \\ + x + \lambda, \text{if } x < -\lambda \\ + 0, \text{otherwise} + \end{cases} )DOC"); } @@ -271,18 +273,18 @@ class HardShrinkOpMaker : public framework::OpProtoAndCheckerMaker { void Make() override { AddInput("X", "Input of HardShrink operator"); AddOutput("Out", "Output of HardShrink operator"); - AddAttr("threshold", "The value of threshold for HardShrink") + AddAttr("threshold", + "The value of threshold for HardShrink. [default: 0.5]") .SetDefault(0.5f); AddComment(R"DOC( -HardShrink Activation Operator. +:strong:`HardShrink activation operator` -$$ -out = \begin{cases} - x, \text{if } x > \lambda \\ - x, \text{if } x < -\lambda \\ - 0, \text{otherwise} - \end{cases} -$$ +.. math:: + out = \begin{cases} + x, \text{if } x > \lambda \\ + x, \text{if } x < -\lambda \\ + 0, \text{otherwise} + \end{cases} )DOC"); } @@ -383,7 +385,7 @@ class STanhOpMaker : public framework::OpProtoAndCheckerMaker { AddComment(R"DOC( STanh Activation Operator. -$$out = b * \frac{e^{a * x} - e^{-a * x}}{e^{a * x} + e^{-a * x}}$$ +$$out = b * \\frac{e^{a * x} - e^{-a * x}}{e^{a * x} + e^{-a * x}}$$ )DOC"); } @@ -394,18 +396,18 @@ class ThresholdedReluOpMaker : public framework::OpProtoAndCheckerMaker { void Make() override { AddInput("X", "Input of ThresholdedRelu operator"); AddOutput("Out", "Output of ThresholdedRelu operator"); - AddAttr("threshold", "The threshold location of activation") + AddAttr("threshold", + "The threshold location of activation. [default 1.0].") .SetDefault(1.0f); AddComment(R"DOC( -ThresholdedRelu Activation Operator. +:strong:`ThresholdedRelu activation operator` -$$ -out = \begin{cases} - x, \text{if } x > threshold \\ - 0, \text{otherwise} - \end{cases} -$$ +.. math:: + out = \begin{cases} + x, \text{if } x > threshold \\ + 0, \text{otherwise} + \end{cases} )DOC"); } }; @@ -444,7 +446,7 @@ class SwishOpMaker : public framework::OpProtoAndCheckerMaker { AddComment(R"DOC( Swish Activation Operator. -$$out = \frac{x}{1 + e^{- \beta x}}$$ +$$out = \\frac{x}{1 + e^{- \beta x}}$$ )DOC"); } diff --git a/paddle/fluid/operators/adam_op.cc b/paddle/fluid/operators/adam_op.cc index 6ee73c3000..5d670fe3b9 100644 --- a/paddle/fluid/operators/adam_op.cc +++ b/paddle/fluid/operators/adam_op.cc @@ -56,9 +56,12 @@ class AdamOp : public framework::OperatorWithKernel { "Beta2 power accumulator should have 1 dimension"); auto param_dims = ctx->GetInputDim("Param"); - PADDLE_ENFORCE_EQ( - param_dims, ctx->GetInputDim("Grad"), - "Param and Grad input of AdamOp should have same dimension"); + if (ctx->GetInputsVarType("Grad")[0] == + framework::proto::VarType::LOD_TENSOR) { + PADDLE_ENFORCE_EQ( + param_dims, ctx->GetInputDim("Grad"), + "Param and Grad input of AdamOp should have same dimension"); + } PADDLE_ENFORCE_EQ( param_dims, ctx->GetInputDim("Moment1"), "Param and Moment1 input of AdamOp should have same dimension"); diff --git a/paddle/fluid/operators/adam_op.h b/paddle/fluid/operators/adam_op.h index f82ff47b52..a7a28b02b6 100644 --- a/paddle/fluid/operators/adam_op.h +++ b/paddle/fluid/operators/adam_op.h @@ -282,6 +282,10 @@ class AdamOpKernel : public framework::OpKernel { } else if (grad_var->IsType()) { auto& grad = Ref(ctx.Input("Grad"), "Must set Grad"); + if (grad.rows().size() == 0) { + VLOG(3) << "grad row size is 0!!"; + return; + } // merge duplicated rows if any. scatter::MergeAdd merge_func; auto grad_merge = diff --git a/paddle/fluid/operators/argsort_op.cc b/paddle/fluid/operators/argsort_op.cc new file mode 100644 index 0000000000..a2f5a25457 --- /dev/null +++ b/paddle/fluid/operators/argsort_op.cc @@ -0,0 +1,87 @@ +/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#include "paddle/fluid/operators/argsort_op.h" + +namespace paddle { +namespace operators { + +class ArgsortOp : public framework::OperatorWithKernel { + public: + using framework::OperatorWithKernel::OperatorWithKernel; + + void InferShape(framework::InferShapeContext *ctx) const override { + PADDLE_ENFORCE(ctx->HasInput("X"), + "Input(X) of ArgsortOp should not be null."); + PADDLE_ENFORCE(ctx->HasOutput("Out"), + "Output(Out) of ArgsortOp should not be null."); + PADDLE_ENFORCE(ctx->HasOutput("Indices"), + "Output(Indices) of ArgsortOp should not be null."); + + auto in_dims = ctx->GetInputDim("X"); + int axis = ctx->Attrs().Get("axis"); + + auto num_dims = in_dims.size(); + PADDLE_ENFORCE(axis < num_dims, + "Attr(axis) %d of ArgsortOp is out of bounds for Input(X)'s " + "rank %d.", + axis, num_dims); + PADDLE_ENFORCE(axis >= -num_dims, + "Attr(axis) %d of ArgsortOp must be not less than " + "-rank(Input(X)) (%d).", + axis, num_dims); + + ctx->SetOutputDim("Out", in_dims); + ctx->SetOutputDim("Indices", in_dims); + ctx->ShareLoD("X", "Out"); + ctx->ShareLoD("X", "Indices"); + } +}; + +class ArgsortOpMaker : public framework::OpProtoAndCheckerMaker { + public: + void Make() override { + AddInput("X", "(Tensor) The input of Argsort op."); + AddOutput("Out", + "(Tensor) The sorted tensor of Argsort op, with the same " + "shape as Input(X)."); + AddOutput("Indices", + "(Tensor) The indices of a tensor giving the sorted order, with " + "the same shape as Input(X)."); + AddComment(R"DOC( +Argsort operator + +Performs sorting on the input tensor along the given axis and outputs two +tensors, Output(Out) and Output(Indices). They reserve the same shape +with Input(X), and Output(Out) represents the sorted tensor while +Output(Indices) gives the sorted order along the given axis Attr(axis). + + )DOC"); + AddAttr("axis", + "(int, default -1) The axis along which to sort the tensor. " + "When axis < 0, the actual axis will be the |axis|'th " + "counting backwards. Default -1, the last dimension.") + .SetDefault(-1); + } +}; + +} // namespace operators +} // namespace paddle + +namespace ops = paddle::operators; +REGISTER_OPERATOR(argsort, ops::ArgsortOp, ops::ArgsortOpMaker, + paddle::framework::EmptyGradOpMaker); +REGISTER_OP_CPU_KERNEL(argsort, + ops::ArgsortKernel, + ops::ArgsortKernel); diff --git a/paddle/fluid/operators/argsort_op.cu b/paddle/fluid/operators/argsort_op.cu new file mode 100644 index 0000000000..7d5199aae7 --- /dev/null +++ b/paddle/fluid/operators/argsort_op.cu @@ -0,0 +1,151 @@ +/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#include +#include +#include "paddle/fluid/framework/op_registry.h" +#include "paddle/fluid/operators/argsort_op.h" +#include "paddle/fluid/platform/assert.h" +#include "paddle/fluid/platform/cuda_device_function.h" +#include "paddle/fluid/platform/cuda_primitives.h" + +namespace paddle { +namespace operators { + +using Tensor = framework::Tensor; +using platform::PADDLE_CUDA_NUM_THREADS; + +const int kMaxRank = 9; // The max rank of a tensor allowed in Fluid + +__global__ void ComputeTargetIdx(const int64_t* in_dims, int dims_size, + int axis, int64_t n, int64_t* trg_idx, + int64_t* med_ids) { + int64_t index = threadIdx.x + blockDim.x * blockIdx.x; + if (index < n) { + int64_t shape_out_axis[kMaxRank - 1] = {0}; + int64_t dims_out_axis[kMaxRank - 1] = {0}; + int64_t tmp = index; + int64_t pos_in_axis = 0; + int64_t i = dims_size - 2; + int64_t dim_axis = 0; + for (int64_t j = dims_size - 1; j >= 0; --j) { + int64_t dim = in_dims[j]; + if (j != axis) { + shape_out_axis[i] = tmp % dim; + dims_out_axis[i] = dim; + i--; + } else { + dim_axis = dim; + pos_in_axis = tmp % dim_axis; + } + tmp /= dim; + } + int64_t group = (dims_size > 1) ? shape_out_axis[0] : 0; + for (int64_t j = 0; j < dims_size - 2; ++j) { + group = group * dims_out_axis[j + 1] + shape_out_axis[j + 1]; + } + + int64_t traget_idx = group * dim_axis + pos_in_axis; + trg_idx[index] = traget_idx; + med_ids[traget_idx] = pos_in_axis; + } +} + +template +__global__ void PermuteInData(const T* in, const int64_t* trg_idx, int64_t n, + T* med_out) { + int index = threadIdx.x + blockDim.x * blockIdx.x; + if (index < n) { + med_out[trg_idx[index]] = in[index]; + } +} + +template +__global__ void Sort(int64_t axis_dim, int64_t groups, T* med_out, + int64_t* med_ids) { + int index = threadIdx.x + blockDim.x * blockIdx.x; + if (index < groups) { + thrust::sort_by_key(thrust::device, med_out + index * axis_dim, + med_out + axis_dim * (1 + index), + med_ids + index * axis_dim); + } +} + +template +__global__ void PermuteMediateData(const T* med_out, const int64_t* med_ids, + const int64_t* trg_idx, int64_t n, T* out, + int64_t* indices) { + int index = threadIdx.x + blockDim.x * blockIdx.x; + if (index < n) { + out[index] = med_out[trg_idx[index]]; + indices[index] = med_ids[trg_idx[index]]; + } +} + +template +class ArgsortOpCUDAKernel : public framework::OpKernel { + public: + void Compute(const framework::ExecutionContext& ctx) const override { + auto* input = ctx.Input("X"); + auto* output = ctx.Output("Out"); + auto* indices = ctx.Output("Indices"); + int axis = ctx.Attr("axis"); + + auto in_dims = input->dims(); + axis = (axis < 0) ? (in_dims.size() + axis) : axis; + + const T* in_data = input->data(); + T* out_data = output->mutable_data(ctx.GetPlace()); + int64_t* ids_data = indices->mutable_data(ctx.GetPlace()); + + int64_t numel = input->numel(); + int64_t groups = numel / in_dims[axis]; + + std::vector in_dims_vec = vectorize(in_dims); + thrust::device_vector in_dims_dev(in_dims_vec.begin(), + in_dims_vec.end()); + int64_t* in_dims_data = thrust::raw_pointer_cast(in_dims_dev.data()); + // Mediate tensor for sorting data and indices + Tensor mediate_output, mediate_indices; + T* med_out_data = + mediate_output.mutable_data(input->dims(), ctx.GetPlace()); + int64_t* med_ids_data = + mediate_indices.mutable_data(in_dims, ctx.GetPlace()); + // Target index of each element along the given axis in the mediate tensors + Tensor trg_idx_t; + int64_t* trg_idx = trg_idx_t.mutable_data(in_dims, ctx.GetPlace()); + + auto stream = ctx.cuda_device_context().stream(); + const int num_threads = PADDLE_CUDA_NUM_THREADS; + + ComputeTargetIdx<<<(numel - 1) / num_threads + 1, num_threads, 0, stream>>>( + in_dims_data, in_dims.size(), axis, numel, trg_idx, med_ids_data); + + PermuteInData<<<(numel - 1) / num_threads + 1, num_threads, 0, stream>>>( + in_data, trg_idx, numel, med_out_data); + + Sort<<<(groups - 1) / num_threads + 1, num_threads, 0, stream>>>( + in_dims[axis], groups, med_out_data, med_ids_data); + + PermuteMediateData<<<(numel - 1) / num_threads + 1, num_threads, 0, + stream>>>(med_out_data, med_ids_data, trg_idx, numel, + out_data, ids_data); + } +}; + +} // namespace operators +} // namespace paddle + +REGISTER_OP_CUDA_KERNEL(argsort, paddle::operators::ArgsortOpCUDAKernel, + paddle::operators::ArgsortOpCUDAKernel); diff --git a/paddle/fluid/operators/argsort_op.h b/paddle/fluid/operators/argsort_op.h new file mode 100644 index 0000000000..7e9112cfb7 --- /dev/null +++ b/paddle/fluid/operators/argsort_op.h @@ -0,0 +1,81 @@ +/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#pragma once +#include +#include +#include +#include "paddle/fluid/framework/op_registry.h" + +namespace paddle { +namespace operators { + +template +class ArgsortKernel : public framework::OpKernel { + public: + void Compute(const framework::ExecutionContext& ctx) const override { + auto* input = ctx.Input("X"); + auto* output = ctx.Output("Out"); + auto* indices = ctx.Output("Indices"); + int axis = ctx.Attr("axis"); + + auto in_dims = input->dims(); + axis = (axis < 0) ? (in_dims.size() + axis) : axis; + + const T* in_data = input->data(); + T* out_data = output->mutable_data(ctx.GetPlace()); + int64_t* ids_data = indices->mutable_data(ctx.GetPlace()); + + int64_t groups = input->numel() / in_dims[axis]; + int64_t stride = (axis == in_dims.size() - 1) + ? 1 + : framework::product(framework::slice_ddim( + in_dims, axis + 1, in_dims.size())); + + for (int64_t i = 0; i < groups; ++i) { + int64_t idx = i; + std::vector shape_vec(in_dims.size(), 0); + for (int64_t dim = in_dims.size() - 1; dim >= 0; --dim) { + if (dim != axis) { + shape_vec[dim] = idx % in_dims[dim]; + idx /= in_dims[dim]; + } + } + + int64_t start_index = shape_vec[0]; + for (int64_t dim = 0; dim < in_dims.size() - 1; ++dim) { + start_index = start_index * in_dims[dim + 1] + shape_vec[dim + 1]; + } + + std::vector org_index_vec(in_dims[axis], start_index); + for (int64_t j = 1; j < in_dims[axis]; ++j) { + org_index_vec[j] += j * stride; + } + + std::sort(org_index_vec.begin(), org_index_vec.end(), + [in_data](const int64_t v1, const int64_t v2) { + return in_data[v1] < in_data[v2]; + }); + + for (size_t j = 0; j < org_index_vec.size(); ++j) { + int64_t index = start_index + j * stride; + out_data[index] = in_data[org_index_vec[j]]; + ids_data[index] = (org_index_vec[j] - start_index) / stride; + } + } + } +}; + +} // namespace operators +} // namespace paddle diff --git a/paddle/fluid/operators/assign_value_op.cc b/paddle/fluid/operators/assign_value_op.cc index 4ad6f3443d..a757916be7 100644 --- a/paddle/fluid/operators/assign_value_op.cc +++ b/paddle/fluid/operators/assign_value_op.cc @@ -70,6 +70,7 @@ $$Out = values$$ namespace ops = paddle::operators; -REGISTER_OPERATOR(assign_value, ops::AssignValueOp, ops::AssignValueOpMaker); +REGISTER_OPERATOR(assign_value, ops::AssignValueOp, ops::AssignValueOpMaker, + paddle::framework::EmptyGradOpMaker); REGISTER_OP_CPU_KERNEL(assign_value, ops::AssignValueKernel, ops::AssignValueKernel); diff --git a/paddle/fluid/operators/average_accumulates_op.cc b/paddle/fluid/operators/average_accumulates_op.cc index 25864e95d7..f389eab605 100644 --- a/paddle/fluid/operators/average_accumulates_op.cc +++ b/paddle/fluid/operators/average_accumulates_op.cc @@ -19,28 +19,28 @@ namespace operators { template <> void GetAccumulators( - const framework::ExecutionContext& ctx, int64_t* num_updates_, - int64_t* num_accumulates_, int64_t* old_num_accumulates_) { + const framework::ExecutionContext& ctx, int64_t* num_updates, + int64_t* num_accumulates, int64_t* old_num_accumulates) { auto* in_old_num_accumulates = ctx.Input("in_old_num_accumulates"); auto* in_num_accumulates = ctx.Input("in_num_accumulates"); auto* in_num_updates = ctx.Input("in_num_updates"); - *old_num_accumulates_ = in_old_num_accumulates->data()[0]; - *num_accumulates_ = in_num_accumulates->data()[0]; - *num_updates_ = in_num_updates->data()[0]; + *old_num_accumulates = in_old_num_accumulates->data()[0]; + *num_accumulates = in_num_accumulates->data()[0]; + *num_updates = in_num_updates->data()[0]; } template <> void SetAccumulators( - const framework::ExecutionContext& ctx, int64_t num_updates_, - int64_t num_accumulates_, int64_t old_num_accumulates_) { + const framework::ExecutionContext& ctx, int64_t num_updates, + int64_t num_accumulates, int64_t old_num_accumulates) { auto* out_old_num_accumulates = ctx.Output("out_old_num_accumulates"); auto* out_num_accumulates = ctx.Output("out_num_accumulates"); auto* out_num_updates = ctx.Output("out_num_updates"); - out_old_num_accumulates->data()[0] = old_num_accumulates_; - out_num_accumulates->data()[0] = num_accumulates_; - out_num_updates->data()[0] = num_updates_; + out_old_num_accumulates->data()[0] = old_num_accumulates; + out_num_accumulates->data()[0] = num_accumulates; + out_num_updates->data()[0] = num_updates; } class AverageAccumulatesOp : public framework::OperatorWithKernel { @@ -177,7 +177,7 @@ class AverageAccumulatesOpMaker : public framework::OpProtoAndCheckerMaker { AddComment(R"DOC( AverageAccumulates Operator. -Accumulate the sum of parameter whtin sliding window. The size of sliding window is +Accumulate the sum of parameter within sliding window. The size of sliding window is determined by 'average_window', 'max_average_window' and 'min_average_window'. Memory was shared by Input(in_sum_1) and Output(out_sum_1) which acts as an accumulator 'sum_1'. 'sum_2', 'sum_3', 'num_accumulates', 'old_num_accumulates' and 'num_updates' were the same as 'sum_1'. diff --git a/paddle/fluid/operators/average_accumulates_op.h b/paddle/fluid/operators/average_accumulates_op.h index 07ac5ced11..3958d3f685 100644 --- a/paddle/fluid/operators/average_accumulates_op.h +++ b/paddle/fluid/operators/average_accumulates_op.h @@ -54,8 +54,9 @@ class AverageAccumulatesKernel : public framework::OpKernel { float average_window = ctx.Attr("average_window"); int64_t max_average_window = ctx.Attr("max_average_window"); int64_t min_average_window = ctx.Attr("min_average_window"); - min_average_window = - std::min(min_average_window, max_average_window); + PADDLE_ENFORCE_LE(min_average_window, max_average_window, + "min_average_window shouldn't be larger than " + "max_average_window"); // Get inputs auto* param = ctx.Input("param"); diff --git a/paddle/fluid/operators/batch_norm_mkldnn_op.cc b/paddle/fluid/operators/batch_norm_mkldnn_op.cc index 8206cc9890..9ab2179b5f 100644 --- a/paddle/fluid/operators/batch_norm_mkldnn_op.cc +++ b/paddle/fluid/operators/batch_norm_mkldnn_op.cc @@ -21,8 +21,6 @@ namespace operators { using batch_norm_bwd = mkldnn::batch_normalization_backward; using batch_norm_fwd = mkldnn::batch_normalization_forward; -using framework::DataLayout; -using framework::Tensor; using mkldnn::memory; using mkldnn::primitive; using mkldnn::reorder; @@ -31,18 +29,6 @@ using paddle::platform::MKLDNNDeviceContext; using paddle::platform::MKLDNNMemDesc; using platform::to_void_cast; -template -using EigenArrayMap = - Eigen::Map>; -template -using ConstEigenArrayMap = - Eigen::Map>; -template -using EigenVectorArrayMap = Eigen::Map>; -template -using ConstEigenVectorArrayMap = - Eigen::Map>; - namespace { template struct bn_type_traits { @@ -80,6 +66,7 @@ class BatchNormMKLDNNOpKernel : public paddle::framework::OpKernel { const float epsilon = ctx.Attr("epsilon"); const float momentum = ctx.Attr("momentum"); const bool is_test = ctx.Attr("is_test"); + const bool fuse_with_relu = ctx.Attr("fuse_with_relu"); const auto *x = ctx.Input("X"); const auto *mean = ctx.Input("Mean"); @@ -125,11 +112,15 @@ class BatchNormMKLDNNOpKernel : public paddle::framework::OpKernel { unsigned flags = mkldnn::use_scale_shift; if (is_test) flags |= mkldnn::use_global_stats; + if (fuse_with_relu) flags |= mkldnn::fuse_bn_relu; // create mkldnn memory from input x tensor - auto src_memory = - memory({{{src_tz}, memory::data_type::f32, x->format()}, mkldnn_engine}, - to_void_cast(x_data)); + mkldnn::memory::format input_format = + platform::MKLDNNFormatForSize(src_tz.size(), x->format()); + + auto src_memory = memory( + {{{src_tz}, memory::data_type::f32, input_format}, mkldnn_engine}, + to_void_cast(x_data)); // create primitive descriptor for batch norm forward using bn_fwd_types = bn_type_traits; @@ -263,15 +254,21 @@ class BatchNormMKLDNNGradOpKernel : public paddle::framework::OpKernel { using bn_bwd_types = bn_type_traits; // create mkldnn memory from input diff_y tensor - auto user_diff_dst_memory = - memory({{{diff_dst_tz}, memory::data_type::f32, diff_y->format()}, - mkldnn_engine}, - to_void_cast(diff_y_data)); + + mkldnn::memory::format dst_format = + platform::MKLDNNFormatForSize(src_tz.size(), diff_y->format()); + + auto user_diff_dst_memory = memory( + {{{diff_dst_tz}, memory::data_type::f32, dst_format}, mkldnn_engine}, + to_void_cast(diff_y_data)); // create mkldnn memory from input x tensor - auto src_memory = - memory({{{src_tz}, memory::data_type::f32, x->format()}, mkldnn_engine}, - to_void_cast(x_data)); + mkldnn::memory::format input_format = + platform::MKLDNNFormatForSize(src_tz.size(), x->format()); + + auto src_memory = memory( + {{{src_tz}, memory::data_type::f32, input_format}, mkldnn_engine}, + to_void_cast(x_data)); // for diff_dst, try to use same format as dst in forward pass auto diff_dst_pd = batch_norm_fwd_pd.get()->dst_primitive_desc(); diff --git a/paddle/fluid/operators/batch_norm_op.cc b/paddle/fluid/operators/batch_norm_op.cc index 625ca2d7c4..5912a1a17c 100644 --- a/paddle/fluid/operators/batch_norm_op.cc +++ b/paddle/fluid/operators/batch_norm_op.cc @@ -22,22 +22,6 @@ limitations under the License. */ namespace paddle { namespace operators { -using Tensor = framework::Tensor; -using LoDTensor = framework::LoDTensor; -using DataLayout = framework::DataLayout; - -template -using EigenArrayMap = - Eigen::Map>; -template -using ConstEigenArrayMap = - Eigen::Map>; -template -using EigenVectorArrayMap = Eigen::Map>; -template -using ConstEigenVectorArrayMap = - Eigen::Map>; - class BatchNormOp : public framework::OperatorWithKernel { public: using framework::OperatorWithKernel::OperatorWithKernel; @@ -171,6 +155,9 @@ class BatchNormOpMaker : public framework::OpProtoAndCheckerMaker { AddAttr("use_mkldnn", "(bool, default false) Only used in mkldnn kernel") .SetDefault(false); + AddAttr("fuse_with_relu", + "(bool, default false) Only used in mkldnn kernel") + .SetDefault(false); AddComment(R"DOC( Batch Normalization. @@ -229,6 +216,18 @@ class BatchNormKernel saved_mean_e.setZero(); saved_variance_e.setZero(); + EigenVectorArrayMap running_mean_arr( + mean_out->mutable_data(ctx.GetPlace()), C); + EigenVectorArrayMap running_var_arr( + variance_out->mutable_data(ctx.GetPlace()), C); + + if ((N * sample_size) == 1) { + LOG(WARNING) << "Only 1 element in normalization dimension, " + << "we skip the batch norm calculation, let y = x."; + framework::TensorCopySync(*x, ctx.GetPlace(), y); + return; + } + switch (data_layout) { case DataLayout::kNCHW: { ConstEigenArrayMap x_arr(x->data(), sample_size, N * C); @@ -260,10 +259,6 @@ class BatchNormKernel PADDLE_THROW("Unknown storage order: %s", data_layout_str); } - EigenVectorArrayMap running_mean_arr( - mean_out->mutable_data(ctx.GetPlace()), C); - EigenVectorArrayMap running_var_arr( - variance_out->mutable_data(ctx.GetPlace()), C); running_mean_arr = running_mean_arr * momentum + saved_mean_e * (1. - momentum); running_var_arr = @@ -440,6 +435,11 @@ class BatchNormGradKernel d_bias_arr.setZero(); d_scale_arr.setZero(); + if ((N * sample_size) == 1) { + framework::TensorCopySync(*d_y, ctx.GetPlace(), d_x); + return; + } + const auto scale_inv_var_nhw = scale_arr * inv_var_arr / (N * sample_size); switch (data_layout) { diff --git a/paddle/fluid/operators/batch_norm_op.cu.cc b/paddle/fluid/operators/batch_norm_op.cu.cc index 550dd32d36..ca6cd86693 100644 --- a/paddle/fluid/operators/batch_norm_op.cu.cc +++ b/paddle/fluid/operators/batch_norm_op.cu.cc @@ -72,6 +72,9 @@ class BatchNormKernel int N, C, H, W, D; ExtractNCWHD(x_dims, data_layout, &N, &C, &H, &W, &D); + auto *y = ctx.Output("Y"); + y->mutable_data(ctx.GetPlace()); + // ------------------- cudnn descriptors --------------------- cudnnTensorDescriptor_t data_desc_; cudnnTensorDescriptor_t bn_param_desc_; @@ -93,7 +96,7 @@ class BatchNormKernel mode_ = CUDNN_BATCHNORM_SPATIAL; #endif - VLOG(1) << "Setting descriptors."; + VLOG(3) << "Setting descriptors."; std::vector dims; std::vector strides; if (data_layout == DataLayout::kNCHW) { @@ -113,11 +116,6 @@ class BatchNormKernel const auto *scale = ctx.Input("Scale"); const auto *bias = ctx.Input("Bias"); - auto *y = ctx.Output("Y"); - - // alloc memory - y->mutable_data(ctx.GetPlace()); - auto &dev_ctx = ctx.template device_context(); auto handle = dev_ctx.cudnn_handle(); @@ -162,22 +160,28 @@ class BatchNormKernel functor(dev_ctx, saved_mean, static_cast>(0)); functor(dev_ctx, saved_variance, static_cast>(0)); - double this_factor = 1. - momentum; - - CUDNN_ENFORCE(platform::dynload::cudnnBatchNormalizationForwardTraining( - handle, mode_, CudnnDataType::kOne(), CudnnDataType::kZero(), - data_desc_, x->template data(), data_desc_, - y->template mutable_data(ctx.GetPlace()), bn_param_desc_, - scale->template data>(), - bias->template data>(), this_factor, - mean_out->template mutable_data>( - ctx.GetPlace()), - variance_out->template mutable_data>( - ctx.GetPlace()), - epsilon, saved_mean->template mutable_data>( - ctx.GetPlace()), - saved_variance->template mutable_data>( - ctx.GetPlace()))); + if ((N * H * W * D) == 1) { + LOG(WARNING) << "Only 1 element in normalization dimension, " + << "we skip the batch norm calculation, let y = x."; + framework::TensorCopySync(*x, ctx.GetPlace(), y); + } else { + double this_factor = 1. - momentum; + + CUDNN_ENFORCE(platform::dynload::cudnnBatchNormalizationForwardTraining( + handle, mode_, CudnnDataType::kOne(), CudnnDataType::kZero(), + data_desc_, x->template data(), data_desc_, + y->template mutable_data(ctx.GetPlace()), bn_param_desc_, + scale->template data>(), + bias->template data>(), this_factor, + mean_out->template mutable_data>( + ctx.GetPlace()), + variance_out->template mutable_data>( + ctx.GetPlace()), + epsilon, saved_mean->template mutable_data>( + ctx.GetPlace()), + saved_variance->template mutable_data>( + ctx.GetPlace()))); + } } // clean when exit. @@ -209,6 +213,25 @@ class BatchNormGradKernel int N, C, H, W, D; ExtractNCWHD(x_dims, data_layout, &N, &C, &H, &W, &D); + // init output + auto *d_x = ctx.Output(framework::GradVarName("X")); + auto *d_scale = ctx.Output(framework::GradVarName("Scale")); + auto *d_bias = ctx.Output(framework::GradVarName("Bias")); + + d_x->mutable_data(ctx.GetPlace()); + d_scale->mutable_data(ctx.GetPlace()); + d_bias->mutable_data(ctx.GetPlace()); + + auto &dev_ctx = ctx.template device_context(); + if ((N * H * W * D) == 1) { + framework::TensorCopySync(*d_y, ctx.GetPlace(), d_x); + math::SetConstant> + functor; + functor(dev_ctx, d_scale, static_cast>(0)); + functor(dev_ctx, d_bias, static_cast>(0)); + return; + } + PADDLE_ENFORCE_EQ(scale->dims().size(), 1UL); PADDLE_ENFORCE_EQ(scale->dims()[0], C); @@ -247,21 +270,11 @@ class BatchNormGradKernel CUDNN_ENFORCE(platform::dynload::cudnnDeriveBNTensorDescriptor( bn_param_desc_, data_desc_, mode_)); - // init output - auto *d_x = ctx.Output(framework::GradVarName("X")); - auto *d_scale = ctx.Output(framework::GradVarName("Scale")); - auto *d_bias = ctx.Output(framework::GradVarName("Bias")); - - d_x->mutable_data(ctx.GetPlace()); - d_scale->mutable_data(ctx.GetPlace()); - d_bias->mutable_data(ctx.GetPlace()); - const auto *saved_mean = ctx.Input("SavedMean"); const auto *saved_var = ctx.Input("SavedVariance"); const void *saved_mean_data = saved_mean->template data(); const void *saved_var_data = saved_var->template data(); - auto &dev_ctx = ctx.template device_context(); CUDNN_ENFORCE(platform::dynload::cudnnBatchNormalizationBackward( dev_ctx.cudnn_handle(), mode_, CudnnDataType::kOne(), CudnnDataType::kZero(), CudnnDataType::kOne(), diff --git a/paddle/fluid/operators/batch_norm_op.h b/paddle/fluid/operators/batch_norm_op.h index 9e5fc41598..5e3d630d68 100644 --- a/paddle/fluid/operators/batch_norm_op.h +++ b/paddle/fluid/operators/batch_norm_op.h @@ -19,6 +19,22 @@ limitations under the License. */ namespace paddle { namespace operators { +using Tensor = framework::Tensor; +using LoDTensor = framework::LoDTensor; +using DataLayout = framework::DataLayout; + +template +using EigenArrayMap = + Eigen::Map>; +template +using ConstEigenArrayMap = + Eigen::Map>; +template +using EigenVectorArrayMap = Eigen::Map>; +template +using ConstEigenVectorArrayMap = + Eigen::Map>; + template class BatchNormKernel : public framework::OpKernel { public: diff --git a/paddle/fluid/operators/beam_search_decode_op.cc b/paddle/fluid/operators/beam_search_decode_op.cc index c3dd22119d..10d678111f 100644 --- a/paddle/fluid/operators/beam_search_decode_op.cc +++ b/paddle/fluid/operators/beam_search_decode_op.cc @@ -12,8 +12,10 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. */ -#include "paddle/fluid/operators/beam_search_decode_op.h" +#include #include + +#include "paddle/fluid/operators/beam_search_decode_op.h" #include "paddle/fluid/platform/device_context.h" namespace paddle { @@ -22,8 +24,11 @@ namespace operators { struct BeamSearchDecodeFunctor { BeamSearchDecodeFunctor(const LoDTensorArray& step_ids, const LoDTensorArray& step_scores, - LoDTensor* id_tensor, LoDTensor* score_tensor) - : step_ids_origin_(step_ids), + LoDTensor* id_tensor, LoDTensor* score_tensor, + size_t beam_size, int end_id) + : beam_size_(beam_size), + end_id_(end_id), + step_ids_origin_(step_ids), step_scores_origin_(step_scores), id_tensor_(id_tensor), score_tensor_(score_tensor) { @@ -37,9 +42,11 @@ struct BeamSearchDecodeFunctor { // Copy all tensors in the input tensor array for (auto& step_id : step_ids_origin_) { framework::LoDTensor out; - dev_ctx->Wait(); - framework::TensorCopy(step_id, platform::CPUPlace(), *dev_ctx, &out); - dev_ctx->Wait(); + if (step_id.numel() > 0) { + dev_ctx->Wait(); + framework::TensorCopy(step_id, platform::CPUPlace(), *dev_ctx, &out); + dev_ctx->Wait(); + } out.set_lod(step_id.lod()); step_ids_.push_back(out); @@ -53,9 +60,12 @@ struct BeamSearchDecodeFunctor { // Copy all tensors in the input tensor array for (auto& step_score : step_scores_origin_) { framework::LoDTensor out; - dev_ctx->Wait(); - framework::TensorCopy(step_score, platform::CPUPlace(), *dev_ctx, &out); - dev_ctx->Wait(); + if (step_score.numel() > 0) { + dev_ctx->Wait(); + framework::TensorCopy(step_score, platform::CPUPlace(), *dev_ctx, + &out); + dev_ctx->Wait(); + } out.set_lod(step_score.lod()); step_scores_.push_back(out); @@ -67,6 +77,8 @@ struct BeamSearchDecodeFunctor { void operator()() const; bool tensor_on_gpu_; + size_t beam_size_; + int end_id_; const LoDTensorArray& step_ids_origin_; const LoDTensorArray& step_scores_origin_; LoDTensorArray step_ids_ = LoDTensorArray(); @@ -77,14 +89,14 @@ struct BeamSearchDecodeFunctor { template void BeamSearchDecodeFunctor::operator()() const { - BeamSearchDecoder beam_search_decoder; + BeamSearchDecoder beam_search_decoder(beam_size_, end_id_); // Check if the tensor is on GPU. If so, use the CPU copy instead if (tensor_on_gpu_) { - beam_search_decoder.PackAllSteps(step_ids_, step_scores_, id_tensor_, - score_tensor_); + beam_search_decoder.Backtrace(step_ids_, step_scores_, id_tensor_, + score_tensor_); } else { - beam_search_decoder.PackAllSteps(step_ids_origin_, step_scores_origin_, - id_tensor_, score_tensor_); + beam_search_decoder.Backtrace(step_ids_origin_, step_scores_origin_, + id_tensor_, score_tensor_); } } @@ -122,13 +134,17 @@ class BeamSearchDecodeOp : public framework::OperatorBase { "Level of LodTensor should be 2"); } + size_t beam_size = ctx.Attr("beam_size"); + int end_id = ctx.Attr("end_id"); + // prepare output LoDTensor* sentenceIds = ctx.Output("SentenceIds"); LoDTensor* sentenceScores = ctx.Output("SentenceScores"); framework::VisitDataType( framework::ToDataType(scores->at(0).type()), - BeamSearchDecodeFunctor(*ids, *scores, sentenceIds, sentenceScores)); + BeamSearchDecodeFunctor(*ids, *scores, sentenceIds, sentenceScores, + beam_size, end_id)); } }; @@ -137,18 +153,32 @@ class BeamSearchDecodeOpProtoMaker : public framework::OpProtoAndCheckerMaker { void Make() override { AddInput("Ids", "(LodTensorArray)" - "score of the candidate words in each step"); + "The LodTensorArray containing the selected ids of all steps"); AddInput("Scores", "(LodTensorArray)" - "score of the candidate words in each step"); - AddOutput("SentenceIds", - "(LodTensor)" - "All possible result sentences of word ids"); - AddOutput("SentenceScores", - "(LodTensor)" - "All possible result sentences of word scores"); + "The LodTensorArray containing the selected scores of all steps"); + AddOutput( + "SentenceIds", + "(LodTensor)" + "An LodTensor containing all generated id sequences for all source " + "sentences"); + AddOutput( + "SentenceScores", + "(LodTensor)" + "An LodTensor containing scores corresponding to Output(SentenceIds)"); + AddAttr("beam_size", "beam size for beam search"); + AddAttr("end_id", + "the token id which indicates the end of a sequence"); AddComment(R"DOC( -Pack the result of Beam search op into SentenceIds and SentenceScores. +Beam Search Decode Operator. This Operator constructs the full hypotheses for +each source sentence by walking back along the LoDTensorArray Input(ids) +whose lods can be used to restore the path in the beam search tree. + +The Output(SentenceIds) and Output(SentenceScores) separately contain the +generated id sequences and the corresponding scores. The shapes and lods of the +two LodTensor are same. The lod level is 2 and the two levels separately +indicate how many hypotheses each source sentence has and how many ids each +hypothesis has. )DOC"); } }; @@ -172,10 +202,12 @@ class BeamSearchDecodeInferVarType : public framework::VarTypeInference { void operator()(const framework::OpDesc& op_desc, framework::BlockDesc* block) const override { for (auto& o : op_desc.Output("SentenceIds")) { - block->Var(o)->SetType(framework::proto::VarType::LOD_TENSOR); + auto& sentence_ids = block->FindRecursiveOrCreateVar(o); + sentence_ids.SetType(framework::proto::VarType::LOD_TENSOR); } for (auto& o : op_desc.Output("SentenceScores")) { - block->Var(o)->SetType(framework::proto::VarType::LOD_TENSOR); + auto& sentence_scores = block->FindRecursiveOrCreateVar(o); + sentence_scores.SetType(framework::proto::VarType::LOD_TENSOR); } } }; diff --git a/paddle/fluid/operators/beam_search_decode_op.h b/paddle/fluid/operators/beam_search_decode_op.h index 3c01f81c83..6aefc5446f 100644 --- a/paddle/fluid/operators/beam_search_decode_op.h +++ b/paddle/fluid/operators/beam_search_decode_op.h @@ -14,7 +14,9 @@ limitations under the License. */ #pragma once +#include #include + #include "paddle/fluid/framework/lod_tensor_array.h" #include "paddle/fluid/framework/op_registry.h" @@ -25,42 +27,12 @@ using LoDTensor = framework::LoDTensor; using LoDTensorArray = framework::LoDTensorArray; // all the lod have 2 levels. -// The First is source level, the second is sentence level. -// source level describe how many candidate words for this source. -// sentence level describe these candidates belong to which prefix +// The first is source level, the second is sentence level. +// source level describe how many prefixes (branchs) for each source sentece +// (beam). sentence level describe how these candidates belong to the prefixes. const size_t kSourceLevel = 0; const size_t kSentenceLevel = 1; -template -struct BeamNode { - BeamNode(int64_t word_id, T score) : word_id_(word_id), score_(score) {} - - ~BeamNode() { - if (parent_) { - parent_->DropKid(this); - if (parent_->kids_.size() == 0UL) { - delete parent_; - } - } - VLOG(3) << "Delete BeamNode root with word_id:" << this->word_id_; - } - - void AppendTo(BeamNode* parent) { - parent_ = parent; - parent->kids_.insert(this); - } - - void DropKid(BeamNode* kid) { kids_.erase(kid); } - - BeamNode* parent_ = nullptr; - std::unordered_set kids_; - int64_t word_id_; - T score_; -}; - -template -using BeamNodeVector = std::vector>>; - template struct Sentence { std::vector word_ids; @@ -72,24 +44,8 @@ using SentenceVector = std::vector>; template struct BeamSearchDecoder { - /** - * make a BeamNode and all it's related prefix BeanNode into a Sentence. - */ - Sentence MakeSentence(const BeamNode* node) const; - - /** - * Param: - * cur_ids: LoDTensor of One step for word ID - * cur_scores: LoDTensor of One Step for word score - * prefixes_list: prefixes for each source sentence. - * sentence_vector_list: result sentence_vector for each source sentence. - * Return: - * a new prefixes list for each source of current step - */ - std::vector> PackTwoSteps( - const LoDTensor& cur_ids, const LoDTensor& cur_scores, - std::vector>* prefixes_list, - std::vector>* sentence_vector_list) const; + BeamSearchDecoder(size_t beam_size, int end_id) + : beam_size_(beam_size), end_id_(end_id) {} /** * convert the result sentence_vector for each source sentence into two @@ -100,107 +56,30 @@ struct BeamSearchDecoder { * sentence_vector_list: sentence_vector for each source sentence. * id_tensor: result LoDTensor for sentences of id. * score_tensor: result LoDTensor for sentences of score. + * reverse: whether ids of sentence in sentence_vector_list is reversed + * sort_by_score: whether to sort hypotheses of each sentence by scores. */ void ConvertSentenceVectorToLodTensor( std::vector> sentence_vector_list, LoDTensor* id_tensor, - LoDTensor* score_tensor) const; + LoDTensor* score_tensor, bool reverse = true, + bool sort_by_score = true) const; /** - * Pack all steps of id/score LodTensor into sentence LoDTensor - * it's main logic is: - * ```python - * prefix - * result_sentence - * result_lod_tensor - * - * for (step in steps): - * prefix = PackTwoSteps(prefix, step, &result_sentence) - * ConvertSentenceVectorToLodTensor(result_sentence, &result_lod_tensor) - * ``` + * Gather the hypotheses for each source sentence by backtrace though the + * LoDTensorArray step_ids whose lods reserve the path in the tree. */ - void PackAllSteps(const LoDTensorArray& step_ids, - const LoDTensorArray& step_scores, LoDTensor* id_tensor, - LoDTensor* score_tensor) const; -}; - -template -Sentence BeamSearchDecoder::MakeSentence(const BeamNode* node) const { - Sentence sentence; - while (node != nullptr) { - sentence.word_ids.emplace_back(node->word_id_); - sentence.scores.emplace_back(node->score_); - node = node->parent_; - } - - std::reverse(std::begin(sentence.word_ids), std::end(sentence.word_ids)); - std::reverse(std::begin(sentence.scores), std::end(sentence.scores)); - - return sentence; -} - -template -std::vector> BeamSearchDecoder::PackTwoSteps( - const LoDTensor& cur_ids, const LoDTensor& cur_scores, - std::vector>* prefixes_list, - std::vector>* sentence_vector_list) const { - std::vector> result; + void Backtrace(const LoDTensorArray& step_ids, + const LoDTensorArray& step_scores, LoDTensor* id_tensor, + LoDTensor* score_tensor) const; - for (size_t src_idx = 0; src_idx < cur_ids.lod()[kSourceLevel].size() - 1; - ++src_idx) { - size_t src_start = cur_ids.lod().at(kSourceLevel)[src_idx]; - size_t src_end = cur_ids.lod().at(kSourceLevel)[src_idx + 1]; - - BeamNodeVector beam_nodes; - - // if prefixes size is 0, it means this is the first step. In this step, - // all candidate id is the start of candidate sentences. - if (prefixes_list->empty()) { - PADDLE_ENFORCE_EQ(cur_ids.lod().at(kSourceLevel).back(), - cur_ids.lod().at(kSentenceLevel).back(), - "in the first step"); - for (size_t id_idx = src_start; id_idx < src_end; ++id_idx) { - beam_nodes.push_back(std::unique_ptr>(new BeamNode( - cur_ids.data()[id_idx], cur_scores.data()[id_idx]))); - } - } else { - BeamNodeVector& prefixes = prefixes_list->at(src_idx); - SentenceVector& sentence_vector = (*sentence_vector_list)[src_idx]; - - PADDLE_ENFORCE_EQ(src_end - src_start, prefixes.size(), - "prefix and candidate set number should be the same"); - - auto candidate_offset = cur_ids.lod()[kSentenceLevel]; - for (size_t prefix_idx = 0; prefix_idx < prefixes.size(); ++prefix_idx) { - std::unique_ptr>& prefix = prefixes[prefix_idx]; - size_t candidate_start = candidate_offset[src_start + prefix_idx]; - size_t candidate_end = candidate_offset[src_start + prefix_idx + 1]; - if (candidate_start == candidate_end) { - VLOG(3) << "this sentence has no more candidate, " - "add to result sentence and rm it from beam tree"; - sentence_vector.push_back(MakeSentence(prefix.get())); - prefix.reset(); - } else { - for (size_t candidate_idx = candidate_start; - candidate_idx < candidate_end; ++candidate_idx) { - auto* candidate = - new BeamNode(cur_ids.data()[candidate_idx], - cur_scores.data()[candidate_idx]); - candidate->AppendTo(prefix.get()); - beam_nodes.push_back(std::unique_ptr>(candidate)); - } - prefix.release(); - } - } - } - result.push_back(std::move(beam_nodes)); - } - return result; -} + size_t beam_size_; + int end_id_; +}; template void BeamSearchDecoder::ConvertSentenceVectorToLodTensor( std::vector> sentence_vector_list, LoDTensor* id_tensor, - LoDTensor* score_tensor) const { + LoDTensor* score_tensor, bool reverse, bool sort_by_score) const { size_t src_num = sentence_vector_list.size(); PADDLE_ENFORCE_NE(src_num, 0, "src_num should not be 0"); @@ -211,11 +90,29 @@ void BeamSearchDecoder::ConvertSentenceVectorToLodTensor( std::vector score_data; for (size_t src_idx = 0; src_idx < src_num; ++src_idx) { + if (sort_by_score) { + sort(sentence_vector_list[src_idx].begin(), + sentence_vector_list[src_idx].end(), + [reverse](const Sentence& a, const Sentence& b) { + if (reverse) + return a.scores.front() > b.scores.front(); + else + return a.scores.back() > b.scores.back(); + }); + } for (Sentence& sentence : sentence_vector_list[src_idx]) { - id_data.insert(id_data.end(), sentence.word_ids.begin(), - sentence.word_ids.end()); - score_data.insert(score_data.end(), sentence.scores.begin(), - sentence.scores.end()); + if (reverse) { + id_data.insert(id_data.end(), sentence.word_ids.rbegin(), + sentence.word_ids.rend()); + score_data.insert(score_data.end(), sentence.scores.rbegin(), + sentence.scores.rend()); + } else { + id_data.insert(id_data.end(), sentence.word_ids.begin(), + sentence.word_ids.end()); + score_data.insert(score_data.end(), sentence.scores.begin(), + sentence.scores.end()); + } + sentence_level_lod.push_back(sentence_level_lod.back() + sentence.word_ids.size()); } @@ -243,39 +140,75 @@ void BeamSearchDecoder::ConvertSentenceVectorToLodTensor( } template -void BeamSearchDecoder::PackAllSteps(const LoDTensorArray& step_ids, - const LoDTensorArray& step_scores, - LoDTensor* id_tensor, - LoDTensor* score_tensor) const { +void BeamSearchDecoder::Backtrace(const LoDTensorArray& step_ids, + const LoDTensorArray& step_scores, + LoDTensor* id_tensor, + LoDTensor* score_tensor) const { PADDLE_ENFORCE(!step_ids.empty(), "step num should be larger than 0"); PADDLE_ENFORCE_EQ(step_ids.size(), step_scores.size(), "step_ids and step_scores should be the same"); const size_t step_num = step_ids.size(); const size_t src_num = step_ids.at(0).lod().at(kSourceLevel).size() - 1; + std::vector> sentence_vector_list( + src_num, SentenceVector(beam_size_)); + std::vector> prefix_idx_vector_list(src_num); + for (int step_id = step_num - 1; step_id >= 0; --step_id) { + auto& cur_ids = step_ids.at(step_id); + auto& cur_scores = step_scores.at(step_id); + for (size_t src_idx = 0; src_idx < src_num; ++src_idx) { + // for each source sentence + auto& sentence_vector = sentence_vector_list.at(src_idx); + auto& prefix_idx_vector = prefix_idx_vector_list.at(src_idx); + size_t src_prefix_start = cur_ids.lod().at(kSourceLevel)[src_idx]; + size_t src_prefix_end = cur_ids.lod().at(kSourceLevel)[src_idx + 1]; + if (prefix_idx_vector.empty()) { // be finished and pruned at this step + // or the last time step + for (size_t prefix_idx = src_prefix_start; prefix_idx < src_prefix_end; + ++prefix_idx) { + size_t candidate_start = cur_ids.lod().at(kSentenceLevel)[prefix_idx]; + size_t candidate_end = + cur_ids.lod().at(kSentenceLevel)[prefix_idx + 1]; + for (size_t candidate_idx = candidate_start; + candidate_idx < candidate_end; ++candidate_idx) { + prefix_idx_vector.push_back(prefix_idx); + size_t idx = prefix_idx_vector.size() - 1; + auto cur_id = cur_ids.data()[candidate_idx]; + auto cur_score = cur_scores.data()[candidate_idx]; + sentence_vector.at(idx).word_ids.push_back(cur_id); + sentence_vector.at(idx).scores.push_back(cur_score); + } + } + } else { // use prefix_idx_vector to backtrace + size_t src_candidate_start = + cur_ids.lod().at(kSentenceLevel)[src_prefix_start]; + size_t prefix_idx = src_prefix_start; + size_t candidate_num = + cur_ids.lod().at(kSentenceLevel)[prefix_idx + 1] - + cur_ids.lod().at(kSentenceLevel)[prefix_idx]; + for (size_t idx = 0; idx < prefix_idx_vector.size(); ++idx) { + auto candidate_idx = prefix_idx_vector.at(idx); + auto cur_id = cur_ids.data()[candidate_idx]; + auto cur_score = cur_scores.data()[candidate_idx]; + if (cur_id != end_id_ || sentence_vector.at(idx).word_ids.empty()) { + // to skip redundant end tokens + sentence_vector.at(idx).word_ids.push_back(cur_id); + sentence_vector.at(idx).scores.push_back(cur_score); + } - PADDLE_ENFORCE_GT(src_num, 0UL, "source num should be larger than 0"); - - // previous prefixes for each step, - // the init length is 0, means this is the first step. - std::vector> beamnode_vector_list(0); - std::vector> sentence_vector_list(src_num); - - // pack all steps for one batch first, then another batch - for (size_t step_id = 0; step_id < step_num; ++step_id) { - beamnode_vector_list = - PackTwoSteps(step_ids.at(step_id), step_scores.at(step_id), - &beamnode_vector_list, &sentence_vector_list); - } - // append last beam_node to result - for (size_t src_idx = 0; src_idx < src_num; ++src_idx) { - for (auto& beam_node : beamnode_vector_list.at(src_idx)) { - sentence_vector_list[src_idx].push_back(MakeSentence(beam_node.get())); - beam_node.reset(); + while (src_candidate_start + candidate_num <= + candidate_idx) { // search the corresponding prefix + prefix_idx++; + candidate_num += cur_ids.lod().at(kSentenceLevel)[prefix_idx + 1] - + cur_ids.lod().at(kSentenceLevel)[prefix_idx]; + } + prefix_idx_vector.at(idx) = prefix_idx; + } + } } } ConvertSentenceVectorToLodTensor(sentence_vector_list, id_tensor, - score_tensor); + score_tensor, true, true); } } // namespace operators diff --git a/paddle/fluid/operators/beam_search_decode_op_test.cc b/paddle/fluid/operators/beam_search_decode_op_test.cc index 36f9594969..88339e38d8 100644 --- a/paddle/fluid/operators/beam_search_decode_op_test.cc +++ b/paddle/fluid/operators/beam_search_decode_op_test.cc @@ -20,15 +20,11 @@ using LoD = paddle::framework::LoD; using LoDTensor = paddle::framework::LoDTensor; using LoDTensorArray = paddle::framework::LoDTensorArray; -template -using BeamNode = paddle::operators::BeamNode; template using BeamSearchDecoder = paddle::operators::BeamSearchDecoder; template using Sentence = paddle::operators::Sentence; template -using BeamNodeVector = paddle::operators::BeamNodeVector; -template using SentenceVector = paddle::operators::SentenceVector; namespace paddle { @@ -77,138 +73,50 @@ void GenerateExample(const std::vector& level_0, } // namespace test } // namespace paddle -TEST(BeamSearchDecodeOp, DeleteBeamNode) { - auto* root = new BeamNode(0, 0); - auto* b1 = new BeamNode(1, 1); - auto* b2 = new BeamNode(2, 2); - auto* b3 = new BeamNode(3, 3); - - b1->AppendTo(root); - b2->AppendTo(root); - b3->AppendTo(b1); - - delete b3; - delete b2; -} - -TEST(BeamSearchDecodeOp, MakeSentence) { - auto* root = new BeamNode(0, 0); - auto* b1 = new BeamNode(1, 1); - auto* end = new BeamNode(2, 2); - b1->AppendTo(root); - end->AppendTo(b1); - - BeamSearchDecoder helper; - Sentence sentence = helper.MakeSentence(end); - delete end; - - std::vector expect_ids = {0, 1, 2}; - ASSERT_EQ(sentence.word_ids, expect_ids); - - std::vector expect_scores = {0, 1, 2}; - ASSERT_EQ(sentence.scores, expect_scores); -} - -TEST(BeamSearchDecodeOp, PackTwoStepsFistStep) { - CPUPlace place; - - LoDTensorArray ids; - LoDTensorArray scores; - - paddle::test::GenerateExample( - std::vector{0, 2, 6}, std::vector{0, 1, 2, 3, 4, 5, 6}, - std::vector{1, 2, 3, 4, 5, 6}, &ids, &scores); - - std::vector> beamnode_vector_list; - std::vector> sentence_vector_list( - 2, SentenceVector()); - - BeamSearchDecoder helper; - beamnode_vector_list = helper.PackTwoSteps( - ids[0], scores[0], &beamnode_vector_list, &sentence_vector_list); - ASSERT_EQ(beamnode_vector_list.size(), 2UL); - ASSERT_EQ(beamnode_vector_list[0].size(), 2UL); - ASSERT_EQ(beamnode_vector_list[1].size(), 4UL); -} - -TEST(BeamSearchDecodeOp, PackTwoSteps) { - CPUPlace place; - - // first source has three prefix - BeamNodeVector source0_prefixes; - source0_prefixes.push_back( - std::unique_ptr>(new BeamNode(1, 1))); - source0_prefixes.push_back( - std::unique_ptr>(new BeamNode(0, 0))); - source0_prefixes.push_back( - std::unique_ptr>(new BeamNode(3, 3))); - - // second source has two prefix - BeamNodeVector source1_prefixes; - source1_prefixes.push_back( - std::unique_ptr>(new BeamNode(4, 4))); - source1_prefixes.push_back( - std::unique_ptr>(new BeamNode(5, 5))); - - std::vector> beamnode_vector_list; - std::vector> sentence_vector_list( - 2, SentenceVector()); - - beamnode_vector_list.push_back(std::move(source0_prefixes)); - beamnode_vector_list.push_back(std::move(source1_prefixes)); - - // generate data for one step - LoDTensorArray ids; - LoDTensorArray scores; - - paddle::test::GenerateExample(std::vector{0, 3, 5}, - std::vector{0, 1, 1, 3, 4, 5}, - std::vector{0, 1, 2, 3, 4}, &ids, &scores); - - BeamSearchDecoder helper1; - beamnode_vector_list = helper1.PackTwoSteps( - ids[0], scores[0], &beamnode_vector_list, &sentence_vector_list); - - ASSERT_EQ(sentence_vector_list[0].size(), 1UL); - ASSERT_EQ(sentence_vector_list[1].size(), 0UL); - ASSERT_EQ(beamnode_vector_list[0].size(), 3UL); - ASSERT_EQ(beamnode_vector_list[1].size(), 2UL); -} - -TEST(BeamSearchDecodeOp, PackAllSteps) { +TEST(BeamSearchDecodeOp, Backtrace) { CPUPlace place; - // we will constuct a sample data with 3 steps and 2 source sentences + // Construct sample data with 5 steps and 2 source sentences + // beam_size = 2, start_id = 0, end_id = 1 LoDTensorArray ids; LoDTensorArray scores; paddle::test::GenerateExample( - std::vector{0, 3, 6}, std::vector{0, 1, 2, 3, 4, 5, 6}, - std::vector{1, 2, 3, 4, 5, 6}, &ids, &scores); + std::vector{0, 1, 2}, std::vector{0, 1, 2}, + std::vector{0, 0}, &ids, &scores); // start with start_id + paddle::test::GenerateExample(std::vector{0, 1, 2}, + std::vector{0, 2, 4}, + std::vector{2, 3, 4, 5}, &ids, &scores); + paddle::test::GenerateExample(std::vector{0, 2, 4}, + std::vector{0, 2, 2, 4, 4}, + std::vector{3, 1, 5, 4}, &ids, &scores); + paddle::test::GenerateExample(std::vector{0, 2, 4}, + std::vector{0, 1, 2, 3, 4}, + std::vector{1, 1, 3, 5}, &ids, &scores); paddle::test::GenerateExample( - std::vector{0, 3, 6}, std::vector{0, 1, 1, 3, 5, 5, 6}, - std::vector{0, 1, 2, 3, 4, 5}, &ids, &scores); - paddle::test::GenerateExample(std::vector{0, 3, 6}, - std::vector{0, 0, 1, 2, 3, 4, 5}, - std::vector{0, 1, 2, 3, 4}, &ids, &scores); + std::vector{0, 2, 4}, + std::vector{0, 0, 0, 2, + 2}, // the branchs of the first source sentence + // are pruned since finished + std::vector{5, 1}, + &ids, &scores); - ASSERT_EQ(ids.size(), 3UL); - ASSERT_EQ(scores.size(), 3UL); + ASSERT_EQ(ids.size(), 5UL); + ASSERT_EQ(scores.size(), 5UL); - BeamSearchDecoder helper; + BeamSearchDecoder helper(2, 1); // beam_size = 2, end_id = 1 LoDTensor id_tensor; LoDTensor score_tensor; - helper.PackAllSteps(ids, scores, &id_tensor, &score_tensor); + helper.Backtrace(ids, scores, &id_tensor, &score_tensor); LoD lod = id_tensor.lod(); - std::vector expect_source_lod = {0, 4, 8}; + std::vector expect_source_lod = {0, 2, 4}; EXPECT_EQ(lod[0], expect_source_lod); - std::vector expect_sentence_lod = {0, 1, 3, 6, 9, 10, 13, 16, 19}; + std::vector expect_sentence_lod = {0, 4, 7, 12, 17}; EXPECT_EQ(lod[1], expect_sentence_lod); - // 2| 1, 0| 3, 1, 0| 3, 2, 1| 5| 4, 3, 2| 4, 4, 3| 6, 5, 4 - std::vector expect_data = {2, 1, 0, 3, 1, 0, 3, 2, 1, 5, - 4, 3, 2, 4, 4, 3, 6, 5, 4}; + std::vector expect_data = {0, 2, 3, 1, 0, 2, 1, 0, 4, + 5, 3, 5, 0, 4, 5, 3, 1}; ASSERT_EQ(id_tensor.dims()[0], static_cast(expect_data.size())); for (size_t i = 0; i < expect_data.size(); ++i) { ASSERT_EQ(id_tensor.data()[i], diff --git a/paddle/fluid/operators/beam_search_op.cc b/paddle/fluid/operators/beam_search_op.cc index df0b50881f..62771d09f1 100644 --- a/paddle/fluid/operators/beam_search_op.cc +++ b/paddle/fluid/operators/beam_search_op.cc @@ -12,25 +12,26 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. */ -#include "paddle/fluid/operators/beam_search_op.h" - #include #include #include #include + #include "paddle/fluid/framework/lod_tensor.h" #include "paddle/fluid/framework/op_registry.h" +#include "paddle/fluid/operators/beam_search_op.h" namespace paddle { namespace operators { void BeamSearch::operator()(const framework::LoDTensor &pre_ids, + const framework::LoDTensor &pre_scores, framework::LoDTensor *selected_ids, framework::LoDTensor *selected_scores) { auto abs_lod = framework::ToAbsOffset(ids_->lod()); auto &high_level = abs_lod[lod_level_]; - auto items = SelectTopBeamSizeItems(); + auto items = SelectTopBeamSizeItems(pre_ids, pre_scores); auto selected_items = ToMap(items, high_level.back()); VLOG(3) << "selected_items:"; for (size_t i = 0; i < selected_items.size(); ++i) { @@ -39,7 +40,8 @@ void BeamSearch::operator()(const framework::LoDTensor &pre_ids, VLOG(3) << ItemToString(item); } } - PruneEndidCandidates(pre_ids, &selected_items); + + PruneEndBeams(pre_ids, &selected_items); // calculate the output tensor's height size_t num_instances = std::accumulate( std::begin(selected_items), std::end(selected_items), 0, @@ -61,12 +63,6 @@ void BeamSearch::operator()(const framework::LoDTensor &pre_ids, size_t low_offset = 0; for (auto &items : selected_items) { low_level.push_back(low_offset); - sort(items.begin(), items.end(), [](const Item &a, const Item &b) { - if (a.offset < b.offset) { - return true; - } - return a.id < b.id; - }); for (auto &item : items) { ids_data[low_offset] = item.id; scores_data[low_offset] = item.score; @@ -86,21 +82,31 @@ void BeamSearch::operator()(const framework::LoDTensor &pre_ids, selected_scores->set_lod(lod); } -int BeamSearch::PruneEndidCandidates(const framework::LoDTensor &pre_ids, - std::vector> *items) { +void BeamSearch::PruneEndBeams(const framework::LoDTensor &pre_ids, + std::vector> *items) { auto *pre_ids_data = pre_ids.data(); - - int res = 0; - for (size_t offset = 0; offset < items->size(); offset++) { - auto prefix_id = pre_ids_data[offset]; - if (prefix_id == end_id_) { - items->at(offset).clear(); - } else { - res++; + auto abs_lod = framework::ToAbsOffset(ids_->lod()); + auto &high_level = abs_lod[lod_level_]; + for (size_t src_idx = 0; src_idx < high_level.size() - 1; ++src_idx) { + size_t src_prefix_start = high_level[src_idx]; + size_t src_prefix_end = high_level[src_idx + 1]; + bool finish_flag = true; + for (size_t offset = src_prefix_start; offset < src_prefix_end; offset++) { + for (auto &item : items->at(offset)) { + if (item.id != static_cast(end_id_) || + pre_ids_data[offset] != end_id_) { + finish_flag = false; + break; + } + } + if (!finish_flag) break; + } + if (finish_flag) { // all branchs of the beam (source sentence) end and + // prune this beam + for (size_t offset = src_prefix_start; offset < src_prefix_end; offset++) + items->at(offset).clear(); } } - - return res; } std::vector> BeamSearch::ToMap( @@ -115,19 +121,17 @@ std::vector> BeamSearch::ToMap( return result; } -std::vector> -BeamSearch::SelectTopBeamSizeItems() { +std::vector> BeamSearch::SelectTopBeamSizeItems( + const framework::LoDTensor &pre_ids, + const framework::LoDTensor &pre_scores) { std::vector> result; std::vector items; // for each source sentence, select the top beam_size items across all // candidate sets. - while (NextItemSet(&items)) { - std::nth_element(std::begin(items), std::begin(items) + beam_size_, - std::end(items), [](const Item &a, const Item &b) { - // TODO(superjom) make score's comparation customizable. - // partial sort in descending order - return a.score > b.score; - }); + while (NextItemSet(pre_ids, pre_scores, &items)) { + std::nth_element( + std::begin(items), std::begin(items) + beam_size_, std::end(items), + [](const Item &a, const Item &b) { return a.score > b.score; }); // prune the top beam_size items. if (items.size() > beam_size_) { items.resize(beam_size_); @@ -146,7 +150,9 @@ BeamSearch::SelectTopBeamSizeItems() { } // the candidates of a source -bool BeamSearch::NextItemSet(std::vector *items) { +bool BeamSearch::NextItemSet(const framework::LoDTensor &pre_ids, + const framework::LoDTensor &pre_scores, + std::vector *items) { if (sent_offset_ >= ids_->NumElements(lod_level_)) { return false; } @@ -164,14 +170,24 @@ bool BeamSearch::NextItemSet(std::vector *items) { instance_dim *= ids.dims()[i]; } + auto *pre_ids_data = pre_ids.data(); + auto *pre_scores_data = pre_scores.data(); items->clear(); items->reserve(framework::product(ids.dims())); for (size_t offset = abs_lod[lod_level_][sent_offset_]; offset < abs_lod[lod_level_][sent_offset_ + 1]; offset++) { - for (size_t d = 0; d < instance_dim; d++) { - const size_t dim_offset = offset * instance_dim + d; - items->emplace_back(offset, ids_data[dim_offset], - scores_data[dim_offset]); + auto pre_id = pre_ids_data[offset]; + auto pre_score = pre_scores_data[offset]; + if (pre_id == end_id_) { + // Allocate all probability mass to eos_id for finished branchs and the + // other candidate ids can be ignored. + items->emplace_back(offset, end_id_, pre_score); + } else { + for (size_t d = 0; d < instance_dim; d++) { + const size_t dim_offset = offset * instance_dim + d; + items->emplace_back(offset, ids_data[dim_offset], + scores_data[dim_offset]); + } } } @@ -199,15 +215,27 @@ class BeamSearchOpMaker : public framework::OpProtoAndCheckerMaker { public: void Make() override { // inputs and outputs stored in proto - AddInput("pre_ids", "ids in previous step"); - AddInput("ids", "a LoDTensor of shape of [None,k]"); + AddInput("pre_ids", + "(LoDTensor) The LoDTensor containing the selected ids at the " + "previous step. It should be a tensor with shape (batch_size, 1) " + "and lod `[[0, 1, ... , batch_size], [0, 1, ..., batch_size]]` at " + "thefirst step."); + AddInput("pre_scores", + "(LoDTensor) The LoDTensor containing the accumulated " + "scores corresponding to the selected ids at the previous step."); + AddInput("ids", + "(LoDTensor) The LoDTensor containing the candidates ids. Its " + "shape should be (batch_size * beam_size, K), where K supposed to " + "be beam_size."); AddInput("scores", - "a LoDTensor that has the same shape and LoD with `ids`"); + "(LoDTensor) The LodTensor containing the accumulated scores " + "corresponding to Input(ids) and its shape is the same as the " + "shape of Input(ids)."); AddOutput("selected_ids", - "a LoDTensor that stores the IDs selected by beam search"); - AddOutput( - "selected_scores", - "a LoDTensor that has the same shape and LoD with `selected_ids`"); + "A LodTensor that stores the IDs selected by beam search."); + AddOutput("selected_scores", + "A LoDTensor containing the accumulated scores corresponding to " + "Output(selected_ids)."); // Attributes stored in AttributeMap AddAttr("level", "the level of LoDTensor"); @@ -215,8 +243,21 @@ class BeamSearchOpMaker : public framework::OpProtoAndCheckerMaker { AddAttr("end_id", "the token id which indicates the end of a sequence"); - AddComment( - "This is a beam search operator that help to generate sequences."); + AddComment(R"DOC( +This operator does the search in beams for one time step. +Specifically, it selects the top-K candidate word ids of current step from +Input(ids) according to their Input(scores) for all source sentences, +where K is Attr(beam_size) and Input(ids), Input(scores) are predicted results +from the computation cell. Additionally, Input(pre_ids) and Input(pre_scores) +are the output of beam_search at previous step, they are needed for special use +to handle ended candidate translations. The paths linking prefixes and selected +candidates are organized and reserved in lod. + +Note that the Input(scores) passed in should be accumulated scores, and +length penalty should be done with extra operators before calculating the +accumulated scores if needed, also suggest finding top-K before it and +using the top-K candidates following. +)DOC"); } }; @@ -253,10 +294,12 @@ class BeamSearchInferVarType : public framework::VarTypeInference { void operator()(const framework::OpDesc &op_desc, framework::BlockDesc *block) const override { for (auto &o : op_desc.Output("selected_ids")) { - block->Var(o)->SetType(framework::proto::VarType::LOD_TENSOR); + auto &selected_ids = block->FindRecursiveOrCreateVar(o); + selected_ids.SetType(framework::proto::VarType::LOD_TENSOR); } for (auto &o : op_desc.Output("selected_scores")) { - block->Var(o)->SetType(framework::proto::VarType::LOD_TENSOR); + auto &selected_scores = block->FindRecursiveOrCreateVar(o); + selected_scores.SetType(framework::proto::VarType::LOD_TENSOR); } } }; diff --git a/paddle/fluid/operators/beam_search_op.h b/paddle/fluid/operators/beam_search_op.h index 46bc4f6f93..b5e2ed0592 100644 --- a/paddle/fluid/operators/beam_search_op.h +++ b/paddle/fluid/operators/beam_search_op.h @@ -132,6 +132,7 @@ class BeamSearch { * that means no candidates is provided, and the task will stop running. */ void operator()(const framework::LoDTensor& pre_ids, + const framework::LoDTensor& pre_scores, framework::LoDTensor* selected_ids, framework::LoDTensor* selected_scores); /* @@ -153,14 +154,16 @@ class BeamSearch { protected: /* - * Delete all the records that follows the end token. + * Prune the source sentences all branchs finished, and it is optional. + * Pruning must one step later than finishing (thus pre_ids is needed here), + * since the end tokens must be writed out. */ - int PruneEndidCandidates(const framework::LoDTensor& pre_ids, - std::vector>* items); + void PruneEndBeams(const framework::LoDTensor& pre_ids, + std::vector>* items); /* * Transform the items into a map whose key is offset, value is the items. - * NOTE low performance + * NOTE low performance. */ std::vector> ToMap( const std::vector>& inputs, size_t element_num); @@ -168,12 +171,16 @@ class BeamSearch { /* * For each source, select top beam_size records. */ - std::vector> SelectTopBeamSizeItems(); + std::vector> SelectTopBeamSizeItems( + const framework::LoDTensor& pre_ids, + const framework::LoDTensor& pre_scores); /* * Get the items of next source sequence, return false if no remaining items. */ - bool NextItemSet(std::vector* items); + bool NextItemSet(const framework::LoDTensor& pre_ids, + const framework::LoDTensor& pre_scores, + std::vector* items); private: size_t beam_size_; @@ -192,24 +199,25 @@ template class BeamSearchOpKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext& context) const override { - auto* ids_var = context.Input("ids"); - auto* scores_var = context.Input("scores"); - auto* pre_ids_var = context.Input("pre_ids"); - PADDLE_ENFORCE_NOT_NULL(ids_var); - PADDLE_ENFORCE_NOT_NULL(scores_var); - PADDLE_ENFORCE_NOT_NULL(pre_ids_var); + auto* ids = context.Input("ids"); + auto* scores = context.Input("scores"); + auto* pre_ids = context.Input("pre_ids"); + auto* pre_scores = context.Input("pre_scores"); + PADDLE_ENFORCE_NOT_NULL(ids); + PADDLE_ENFORCE_NOT_NULL(scores); + PADDLE_ENFORCE_NOT_NULL(pre_ids); + PADDLE_ENFORCE_NOT_NULL(pre_scores); size_t level = context.Attr("level"); size_t beam_size = context.Attr("beam_size"); int end_id = context.Attr("end_id"); - BeamSearch alg(*ids_var, *scores_var, level, beam_size, end_id); - auto selected_ids_var = - context.Output("selected_ids"); - auto selected_scores_var = + BeamSearch alg(*ids, *scores, level, beam_size, end_id); + auto selected_ids = context.Output("selected_ids"); + auto selected_scores = context.Output("selected_scores"); - PADDLE_ENFORCE_NOT_NULL(selected_ids_var); - PADDLE_ENFORCE_NOT_NULL(selected_scores_var); - alg(*pre_ids_var, selected_ids_var, selected_scores_var); + PADDLE_ENFORCE_NOT_NULL(selected_ids); + PADDLE_ENFORCE_NOT_NULL(selected_scores); + alg(*pre_ids, *pre_scores, selected_ids, selected_scores); } }; } // namespace operators diff --git a/paddle/fluid/operators/beam_search_op_test.cc b/paddle/fluid/operators/beam_search_op_test.cc index ec666359aa..c4f4b478fb 100644 --- a/paddle/fluid/operators/beam_search_op_test.cc +++ b/paddle/fluid/operators/beam_search_op_test.cc @@ -30,7 +30,7 @@ using std::endl; void CreateInput(LoDTensor* ids, LoDTensor* scores) { LoD lod; - vector level0({0, 1, 4}); + vector level0({0, 2, 4}); vector level1({0, 1, 2, 3, 4}); lod.push_back(level0); lod.push_back(level1); @@ -64,17 +64,22 @@ TEST(beam_search_op, run) { for (int i = 0; i < 4; i++) { pre_ids.mutable_data(place)[i] = i + 1; } + LoDTensor pre_scores; + pre_scores.Resize(framework::make_ddim(vector(4, 1))); + for (int i = 0; i < 4; i++) { + pre_scores.mutable_data(place)[i] = 0.1 * (i + 1); + } - BeamSearch beamsearch(ids, scores, (int64_t)0, (int64_t)2, 0); + BeamSearch beamsearch(ids, scores, (size_t)0, (size_t)2, 0); LoDTensor sids, sscores; - beamsearch(pre_ids, &sids, &sscores); + beamsearch(pre_ids, pre_scores, &sids, &sscores); LOG(INFO) << "score: " << sscores << endl; ASSERT_EQ(sids.lod(), sscores.lod()); - vector tids({2, 4, 3, 8}); - vector tscores({0.3, 0.5, 0.9, 0.7}); + vector tids({4, 2, 3, 8}); + vector tscores({0.5, 0.6, 0.9, 0.7}); for (int i = 0; i < 4; i++) { ASSERT_EQ(tids[i], sids.data()[i]); diff --git a/paddle/fluid/operators/bilinear_interp_op.cc b/paddle/fluid/operators/bilinear_interp_op.cc index 2572e813d6..2dc3399da1 100644 --- a/paddle/fluid/operators/bilinear_interp_op.cc +++ b/paddle/fluid/operators/bilinear_interp_op.cc @@ -110,6 +110,7 @@ REGISTER_OPERATOR(bilinear_interp, ops::BilinearInterpOp, ops::BilinearInterpOpMaker, paddle::framework::DefaultGradOpDescMaker); REGISTER_OPERATOR(bilinear_interp_grad, ops::BilinearInterpOpGrad); -REGISTER_OP_CPU_KERNEL(bilinear_interp, ops::BilinearInterpKernel); +REGISTER_OP_CPU_KERNEL(bilinear_interp, ops::BilinearInterpKernel, + ops::BilinearInterpKernel); REGISTER_OP_CPU_KERNEL(bilinear_interp_grad, ops::BilinearInterpGradKernel); diff --git a/paddle/fluid/operators/bilinear_interp_op.h b/paddle/fluid/operators/bilinear_interp_op.h index 8b03cd5a06..70847cb8c1 100644 --- a/paddle/fluid/operators/bilinear_interp_op.h +++ b/paddle/fluid/operators/bilinear_interp_op.h @@ -46,8 +46,10 @@ class BilinearInterpKernel : public framework::OpKernel { int in_chw = channels * in_hw; int out_chw = channels * out_hw; - T ratio_h = (out_h > 1) ? static_cast(in_h - 1) / (out_h - 1) : 0.f; - T ratio_w = (out_w > 1) ? static_cast(in_w - 1) / (out_w - 1) : 0.f; + float ratio_h = + (out_h > 1) ? static_cast(in_h - 1) / (out_h - 1) : 0.f; + float ratio_w = + (out_w > 1) ? static_cast(in_w - 1) / (out_w - 1) : 0.f; if (in_h == out_h && in_w == out_w) { memcpy(output, input, input_t->numel() * sizeof(T)); @@ -56,24 +58,24 @@ class BilinearInterpKernel : public framework::OpKernel { for (int i = 0; i < out_h; ++i) { // loop for images int h = ratio_h * i; int hid = (h < in_h - 1) ? 1 : 0; - T h1lambda = ratio_h * i - h; - T h2lambda = 1 - h1lambda; + float h1lambda = ratio_h * i - h; + float h2lambda = 1.f - h1lambda; for (int j = 0; j < out_w; ++j) { int w = ratio_w * j; int wid = (w < in_w - 1) ? 1 : 0; - T w1lambda = ratio_w * j - w; - T w2lambda = 1 - w1lambda; + float w1lambda = ratio_w * j - w; + float w2lambda = 1.f - w1lambda; // calculate four position for bilinear interpolation const T* in_pos = &input[k * in_chw + h * in_w + w]; T* out_pos = &output[k * out_chw + i * out_w + j]; for (int c = 0; c < channels; ++c) { // loop for channels // bilinear interpolation - out_pos[0] = + out_pos[0] = static_cast( h2lambda * (w2lambda * in_pos[0] + w1lambda * in_pos[wid]) + h1lambda * (w2lambda * in_pos[hid * in_w] + - w1lambda * in_pos[hid * in_w + wid]); + w1lambda * in_pos[hid * in_w + wid])); in_pos += in_hw; out_pos += out_hw; } @@ -117,8 +119,10 @@ class BilinearInterpGradKernel : public framework::OpKernel { int in_chw = channels * in_hw; int out_chw = channels * out_hw; - T ratio_h = (out_h > 1) ? static_cast(in_h - 1) / (out_h - 1) : 0.f; - T ratio_w = (out_w > 1) ? static_cast(in_w - 1) / (out_w - 1) : 0.f; + float ratio_h = + (out_h > 1) ? static_cast(in_h - 1) / (out_h - 1) : 0.f; + float ratio_w = + (out_w > 1) ? static_cast(in_w - 1) / (out_w - 1) : 0.f; if (in_h == out_h && in_w == out_w) { memcpy(d_input, d_output, d_input_t->numel() * sizeof(T)); @@ -127,22 +131,24 @@ class BilinearInterpGradKernel : public framework::OpKernel { for (int i = 0; i < out_h; ++i) { // loop for images int h = ratio_h * i; int hid = (h < in_h - 1) ? 1 : 0; - T h1lambda = ratio_h * i - h; - T h2lambda = 1 - h1lambda; + float h1lambda = ratio_h * i - h; + float h2lambda = 1 - h1lambda; for (int j = 0; j < out_w; ++j) { int w = ratio_w * j; int wid = (w < in_w - 1) ? 1 : 0; - T w1lambda = ratio_w * j - w; - T w2lambda = 1 - w1lambda; + float w1lambda = ratio_w * j - w; + float w2lambda = 1 - w1lambda; T* in_pos = &d_input[k * in_chw + h * in_w + w]; const T* out_pos = &d_output[k * out_chw + i * out_w + j]; for (int c = 0; c < channels; ++c) { // loop for channels - in_pos[0] += h2lambda * w2lambda * out_pos[0]; - in_pos[wid] += h2lambda * w1lambda * out_pos[0]; - in_pos[hid * in_w] += h1lambda * w2lambda * out_pos[0]; - in_pos[hid * in_w + wid] += h1lambda * w1lambda * out_pos[0]; + in_pos[0] += static_cast(h2lambda * w2lambda * out_pos[0]); + in_pos[wid] += static_cast(h2lambda * w1lambda * out_pos[0]); + in_pos[hid * in_w] += + static_cast(h1lambda * w2lambda * out_pos[0]); + in_pos[hid * in_w + wid] += + static_cast(h1lambda * w1lambda * out_pos[0]); in_pos += in_hw; out_pos += out_hw; } diff --git a/paddle/fluid/operators/checkpoint_notify_op.cc b/paddle/fluid/operators/checkpoint_notify_op.cc new file mode 100644 index 0000000000..c4219a429a --- /dev/null +++ b/paddle/fluid/operators/checkpoint_notify_op.cc @@ -0,0 +1,88 @@ +/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#include // NOLINT +#include + +#include "paddle/fluid/framework/data_type.h" +#include "paddle/fluid/framework/lod_tensor.h" +#include "paddle/fluid/framework/op_registry.h" +#include "paddle/fluid/operators/detail/macros.h" +#include "paddle/fluid/operators/send_recv_util.h" +#include "paddle/fluid/string/printf.h" + +namespace paddle { +namespace operators { + +class CheckpointNotifyOp : public framework::OperatorBase { + public: + CheckpointNotifyOp(const std::string& type, + const framework::VariableNameMap& inputs, + const framework::VariableNameMap& outputs, + const framework::AttributeMap& attrs) + : OperatorBase(type, inputs, outputs, attrs) {} + + void RunImpl(const framework::Scope& scope, + const platform::Place& place) const override { + std::vector epmap = Attr>("epmap"); + std::string dir = Attr("dir"); + std::string lookup_table_name = Attr("lookup_table"); + + distributed::RPCClient* rpc_client = + distributed::RPCClient::GetInstance(); + for (size_t i = 0; i < epmap.size(); i++) { + auto lookup_table_save_dir = + string::Sprintf("%s/%s_%d", dir, lookup_table_name, i); + rpc_client->AsyncCheckpointNotify(epmap[i], lookup_table_save_dir); + VLOG(3) << "checkpoint notify sending lookup table: " << lookup_table_name + << " and dir:" << dir << " to " << epmap[i]; + } + rpc_client->Wait(); + } +}; + +class CheckpointNotifyOpMaker : public framework::OpProtoAndCheckerMaker { + public: + void Make() { + AddAttr>("epmap", + "(string vector, default 127.0.0.1:6164)" + "Parameter Server endpoints in the order") + .SetDefault({"127.0.0.1:6164"}); + AddAttr( + "dir", "(string, default '') indicate the folder checkpoint will use"); + AddAttr("lookup_table", + "(string, default '') the lookup table name"); + AddComment(R"DOC( +CheckpointNotify operator + +This operator will send lookup table and it's checkpoint direcoty to listen_and_serve op at +the parameter server. +)DOC"); + } +}; + +class CheckpointNotifyOpShapeInference : public framework::InferShapeBase { + public: + void operator()(framework::InferShapeContext* ctx) const override {} +}; + +} // namespace operators +} // namespace paddle + +namespace ops = paddle::operators; + +REGISTER_OPERATOR(checkpoint_notify, ops::CheckpointNotifyOp, + paddle::framework::EmptyGradOpMaker, + ops::CheckpointNotifyOpMaker, + ops::CheckpointNotifyOpShapeInference); diff --git a/paddle/fluid/operators/chunk_eval_op.cc b/paddle/fluid/operators/chunk_eval_op.cc index 62636bb2f9..dc43c69be0 100644 --- a/paddle/fluid/operators/chunk_eval_op.cc +++ b/paddle/fluid/operators/chunk_eval_op.cc @@ -91,32 +91,31 @@ class ChunkEvalOpMaker : public framework::OpProtoAndCheckerMaker { "(int64_t). The number of chunks both in Inference and Label on the " "given mini-batch."); AddAttr("num_chunk_types", - "(int). The number of chunk type. See below for details."); - AddAttr( - "chunk_scheme", - "(string, default IOB). The labeling scheme indicating " - "how to encode the chunks. Must be IOB, IOE, IOBES or plain. See below " - "for details.") + "The number of chunk type. See the description for details."); + AddAttr("chunk_scheme", + "The labeling scheme indicating " + "how to encode the chunks. Must be IOB, IOE, IOBES or " + "plain. See the description" + "for details.") .SetDefault("IOB"); AddAttr>("excluded_chunk_types", - "(list) A list including chunk type ids " + "A list including chunk type ids " "indicating chunk types that are not counted. " - "See below for details.") + "See the description for details.") .SetDefault(std::vector{}); AddComment(R"DOC( For some basics of chunking, please refer to -‘Chunking with Support Vector Machines ’. +'Chunking with Support Vector Machines '. - -CheckEvalOp computes the precision, recall, and F1-score of chunk detection, +ChunkEvalOp computes the precision, recall, and F1-score of chunk detection, and supports IOB, IOE, IOBES and IO (also known as plain) tagging schemes. Here is a NER example of labeling for these tagging schemes: - - Li Ming works at Agricultural Bank of China in Beijing. - IO: I-PER I-PER O O I-ORG I-ORG I-ORG I-ORG O I-LOC - IOB: B-PER I-PER O O B-ORG I-ORG I-ORG I-ORG O B-LOC - IOE: I-PER E-PER O O I-ORG I-ORG I-ORG E-ORG O E-LOC - IOBES: B-PER E-PER O O I-ORG I-ORG I-ORG E-ORG O S-LOC + + Li Ming works at Agricultural Bank of China in Beijing. + IO I-PER I-PER O O I-ORG I-ORG I-ORG I-ORG O I-LOC + IOB B-PER I-PER O O B-ORG I-ORG I-ORG I-ORG O B-LOC + IOE I-PER E-PER O O I-ORG I-ORG I-ORG E-ORG O E-LOC + IOBES B-PER E-PER O O I-ORG I-ORG I-ORG E-ORG O S-LOC There are three chunk types(named entity types) including PER(person), ORG(organization) and LOC(LOCATION), and we can see that the labels have the form -. @@ -124,31 +123,31 @@ and LOC(LOCATION), and we can see that the labels have the form -("force_cpu", - "(bool, default false) Force fill output variable to cpu " + "Force fill output variable to cpu " "memory. Otherwise, fill output variable to the running " - "device") - .SetDefault(false); - AddOutput("Out", string::Sprintf( - "(LoDTensor) n-dim bool tensor. Each element is %s", - comment.equation)); - AddComment(string::Sprintf(R"DOC(%s Operator - + "device [default true].") + .SetDefault(true); + AddOutput("Out", string::Sprintf("n-dim bool tensor. Each element is %s", + comment.equation)); + AddComment(string::Sprintf(R"DOC( It operates element-wise on X and Y, and returns the Out. Each of them is a N-dim tensor. X and Y could be any type. The each element of the Out tensor is -calculated by %s +calculated by $%s$ )DOC", - comment.type, comment.equation)); - AddAttr("axis", - "(int, default -1). The start dimension index " - "for broadcasting Y onto X.") + comment.equation)); + AddAttr( + "axis", + "The start dimension index for broadcasting Y onto X. [default -1]") .SetDefault(-1) .EqualGreaterThan(-1); } diff --git a/paddle/fluid/operators/concat_op.h b/paddle/fluid/operators/concat_op.h index 1b1b8bf5ed..a496301526 100644 --- a/paddle/fluid/operators/concat_op.h +++ b/paddle/fluid/operators/concat_op.h @@ -60,34 +60,45 @@ template class ConcatGradKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext& ctx) const { - auto* in = ctx.Input(framework::GradVarName("Out")); + auto* out_grad = + ctx.Input(framework::GradVarName("Out")); + auto ins = ctx.MultiInput("X"); + auto out_var_names = ctx.Outputs(framework::GradVarName("X")); auto outs = ctx.MultiOutput(framework::GradVarName("X")); int64_t axis = static_cast(ctx.Attr("axis")); + // get output tensor that the name is not kEmptyVarName + std::vector outputs; + for (size_t j = 0; j < outs.size(); ++j) { + if (out_var_names[j] != framework::kEmptyVarName) { + outs[j]->mutable_data(ctx.GetPlace()); + outputs.push_back(outs[j]); + } else { + outputs.push_back(nullptr); + } + } + // Sometimes direct copies will be faster, this maybe need deeply analysis. if (axis == 0 && outs.size() < 10) { size_t input_offset = 0; - auto in_stride = framework::stride_numel(in->dims()); + const auto in_stride = framework::stride_numel(out_grad->dims()); - for (auto& out : outs) { - out->mutable_data(ctx.GetPlace()); - auto out_stride = framework::stride_numel(out->dims()); - StridedNumelCopyWithAxis(ctx.device_context(), axis, out->data(), - out_stride, in->data() + input_offset, - in_stride, out_stride[axis]); + for (size_t i = 0; i < outs.size(); ++i) { + auto out_stride = framework::stride_numel(ins[i]->dims()); + auto* out = outputs[i]; + if (out != nullptr) { + StridedNumelCopyWithAxis( + ctx.device_context(), axis, out->data(), out_stride, + out_grad->data() + input_offset, in_stride, out_stride[axis]); + } input_offset += out_stride[axis]; } } else { - std::vector outputs(outs.size()); - for (size_t j = 0; j < outs.size(); ++j) { - outs[j]->mutable_data(ctx.GetPlace()); - outputs[j] = *outs[j]; - } - auto& dev_ctx = ctx.template device_context(); paddle::operators::math::ConcatGradFunctor concat_grad_functor; - concat_grad_functor(dev_ctx, *in, static_cast(axis), &outputs); + concat_grad_functor(dev_ctx, *out_grad, ins, static_cast(axis), + &outputs); } } }; diff --git a/paddle/fluid/operators/conditional_block_op.cc b/paddle/fluid/operators/conditional_block_op.cc index 5984f80d04..580fde7538 100644 --- a/paddle/fluid/operators/conditional_block_op.cc +++ b/paddle/fluid/operators/conditional_block_op.cc @@ -14,6 +14,7 @@ limitations under the License. */ #include #include "paddle/fluid/framework/executor.h" #include "paddle/fluid/framework/op_registry.h" +#include "paddle/fluid/framework/var_type.h" namespace paddle { namespace operators { @@ -47,7 +48,7 @@ class ConditionalOp : public framework::OperatorBase { if (!(ips.size() == 1UL && ips[0]->IsInitialized())) { PADDLE_THROW("should have one initialized input as condition"); } - if (!(ips[0]->type().hash_code() == typeid(bool).hash_code() && // NOLINT + if (!(framework::IsType(ips[0]->type()) && // NOLINT ips[0]->numel() == 1)) { PADDLE_THROW( "condition input's data type should be bool, " @@ -204,9 +205,10 @@ class ConditionalBlockGradInferShape : public framework::InferShapeBase { context->SetOutputsDim(framework::GradVarName("Params"), context->GetInputsDim("Params")); } - PADDLE_ENFORCE(context->HasOutputs(framework::GradVarName("X"))); - context->SetOutputsDim(framework::GradVarName("X"), - context->GetInputsDim("X")); + if (context->HasOutputs(framework::GradVarName("X"))) { + context->SetOutputsDim(framework::GradVarName("X"), + context->GetInputsDim("X")); + } } }; diff --git a/paddle/fluid/operators/conv_transpose_op.cc b/paddle/fluid/operators/conv_transpose_op.cc index 0b363f5c43..eeb98ee44f 100644 --- a/paddle/fluid/operators/conv_transpose_op.cc +++ b/paddle/fluid/operators/conv_transpose_op.cc @@ -156,7 +156,7 @@ Parameters(strides, paddings) are two elements. These two elements represent hei and width, respectively. The input(X) size and output(Out) size may be different. -Example: +For an example: Input: Input shape: $(N, C_{in}, H_{in}, W_{in})$ Filter shape: $(C_{in}, C_{out}, H_f, W_f)$ @@ -302,6 +302,7 @@ framework::OpKernelType ConvTransposeOpGrad::GetExpectedKernelType( namespace ops = paddle::operators; +// conv2d_transpose REGISTER_OPERATOR(conv2d_transpose, ops::ConvTransposeOp, ops::Conv2DTransposeOpMaker, paddle::framework::DefaultGradOpDescMaker); @@ -317,6 +318,7 @@ REGISTER_OP_CPU_KERNEL( ops::GemmConvTransposeGradKernel); +// conv3d_transpose REGISTER_OPERATOR(conv3d_transpose, ops::ConvTransposeOp, ops::Conv3DTransposeOpMaker, paddle::framework::DefaultGradOpDescMaker); @@ -331,3 +333,19 @@ REGISTER_OP_CPU_KERNEL( ops::GemmConvTransposeGradKernel, ops::GemmConvTransposeGradKernel); + +// depthwise conv2d_transpose +REGISTER_OPERATOR(depthwise_conv2d_transpose, ops::ConvTransposeOp, + ops::Conv2DTransposeOpMaker, + paddle::framework::DefaultGradOpDescMaker); +REGISTER_OPERATOR(depthwise_conv2d_transpose_grad, ops::ConvTransposeOpGrad); + +REGISTER_OP_CPU_KERNEL( + depthwise_conv2d_transpose, + ops::GemmConvTransposeKernel, + ops::GemmConvTransposeKernel); +REGISTER_OP_CPU_KERNEL( + depthwise_conv2d_transpose_grad, + ops::GemmConvTransposeGradKernel, + ops::GemmConvTransposeGradKernel); diff --git a/paddle/fluid/operators/conv_transpose_op.cu.cc b/paddle/fluid/operators/conv_transpose_op.cu.cc index 640fa7d14a..a6d5665df8 100644 --- a/paddle/fluid/operators/conv_transpose_op.cu.cc +++ b/paddle/fluid/operators/conv_transpose_op.cu.cc @@ -15,25 +15,28 @@ limitations under the License. */ #include "paddle/fluid/operators/conv_transpose_op.h" namespace ops = paddle::operators; +using CUDA = paddle::platform::CUDADeviceContext; -REGISTER_OP_CUDA_KERNEL( - conv2d_transpose, - ops::GemmConvTransposeKernel, - ops::GemmConvTransposeKernel); -REGISTER_OP_CUDA_KERNEL( - conv2d_transpose_grad, - ops::GemmConvTransposeGradKernel, - ops::GemmConvTransposeGradKernel); - -REGISTER_OP_CUDA_KERNEL( - conv3d_transpose, - ops::GemmConvTransposeKernel, - ops::GemmConvTransposeKernel); -REGISTER_OP_CUDA_KERNEL( - conv3d_transpose_grad, - ops::GemmConvTransposeGradKernel, - ops::GemmConvTransposeGradKernel); +// conv2d +REGISTER_OP_CUDA_KERNEL(conv2d_transpose, + ops::GemmConvTransposeKernel, + ops::GemmConvTransposeKernel); +REGISTER_OP_CUDA_KERNEL(conv2d_transpose_grad, + ops::GemmConvTransposeGradKernel, + ops::GemmConvTransposeGradKernel); + +// conv3d +REGISTER_OP_CUDA_KERNEL(conv3d_transpose, + ops::GemmConvTransposeKernel, + ops::GemmConvTransposeKernel); +REGISTER_OP_CUDA_KERNEL(conv3d_transpose_grad, + ops::GemmConvTransposeGradKernel, + ops::GemmConvTransposeGradKernel); + +// depthwise conv2d +REGISTER_OP_CUDA_KERNEL(depthwise_conv2d_transpose, + ops::DepthwiseConvTransposeKernel, + ops::DepthwiseConvTransposeKernel); +REGISTER_OP_CUDA_KERNEL(depthwise_conv2d_transpose_grad, + ops::DepthwiseConvTransposeGradKernel, + ops::DepthwiseConvTransposeGradKernel); diff --git a/paddle/fluid/operators/conv_transpose_op.h b/paddle/fluid/operators/conv_transpose_op.h index 1dcfc651fd..0d9c6a62fe 100644 --- a/paddle/fluid/operators/conv_transpose_op.h +++ b/paddle/fluid/operators/conv_transpose_op.h @@ -17,6 +17,7 @@ limitations under the License. */ #include "paddle/fluid/framework/eigen.h" #include "paddle/fluid/framework/op_registry.h" #include "paddle/fluid/operators/math/blas.h" +#include "paddle/fluid/operators/math/depthwise_conv.h" #include "paddle/fluid/operators/math/im2col.h" #include "paddle/fluid/operators/math/vol2col.h" @@ -316,5 +317,74 @@ class GemmConvTransposeGradKernel : public framework::OpKernel { } } }; + +template +class DepthwiseConvTransposeKernel : public framework::OpKernel { + public: + void Compute(const framework::ExecutionContext& context) const override { + const Tensor* input = context.Input("Input"); + Tensor filter = *context.Input("Filter"); + Tensor* output = context.Output("Output"); + output->mutable_data(context.GetPlace()); + + int groups = context.Attr("groups"); + PADDLE_ENFORCE_EQ(groups, filter.dims()[0]); + + std::vector strides = context.Attr>("strides"); + std::vector paddings = context.Attr>("paddings"); + std::vector dilations = context.Attr>("dilations"); + for (auto v : dilations) { + PADDLE_ENFORCE_EQ(v, 1); + } + + output->mutable_data(context.GetPlace()); + auto& dev_ctx = context.template device_context(); + math::SetConstant set_zero; + set_zero(dev_ctx, output, static_cast(0)); + + math::DepthwiseConvInputGradFunctor + depthwiseConvInputGrad; + depthwiseConvInputGrad(dev_ctx, *output, filter, *input, strides, paddings, + output); + } +}; + +template +class DepthwiseConvTransposeGradKernel : public framework::OpKernel { + public: + void Compute(const framework::ExecutionContext& context) const override { + const Tensor* input = context.Input("Input"); + const Tensor* output_grad = + context.Input(framework::GradVarName("Output")); + Tensor* input_grad = + context.Output(framework::GradVarName("Input")); + Tensor* filter_grad = + context.Output(framework::GradVarName("Filter")); + Tensor filter = *context.Input("Filter"); + + if (!input_grad && !filter_grad) return; + + auto& dev_ctx = context.template device_context(); + std::vector strides = context.Attr>("strides"); + std::vector paddings = context.Attr>("paddings"); + + if (input_grad) { + math::DepthwiseConvFunctor depthwiseConv; + depthwiseConv(dev_ctx, *output_grad, filter, strides, paddings, + input_grad); + } + + if (filter_grad) { + math::SetConstant set_zero; + filter_grad->mutable_data(context.GetPlace()); + set_zero(dev_ctx, filter_grad, static_cast(0)); + + math::DepthwiseConvFilterGradFunctor + depthwiseConvFilterGrad; + depthwiseConvFilterGrad(dev_ctx, *output_grad, *input, strides, paddings, + filter_grad); + } + } +}; } // namespace operators } // namespace paddle diff --git a/paddle/fluid/operators/cos_sim_op.cc b/paddle/fluid/operators/cos_sim_op.cc index 046dd11910..8f3644039f 100644 --- a/paddle/fluid/operators/cos_sim_op.cc +++ b/paddle/fluid/operators/cos_sim_op.cc @@ -76,9 +76,9 @@ class CosSimOpMaker : public framework::OpProtoAndCheckerMaker { .AsIntermediate(); AddComment(R"DOC( -Cosine Similarity Operator. +**Cosine Similarity Operator** -$Out = X^T * Y / (\sqrt{X^T * X} * \sqrt{Y^T * Y})$ +$Out = \frac{X^T * Y}{(\sqrt{X^T * X} * \sqrt{Y^T * Y})}$ The input X and Y must have the same shape, except that the 1st dimension of input Y could be just 1 (different from input X), which will be diff --git a/paddle/fluid/operators/crf_decoding_op.cc b/paddle/fluid/operators/crf_decoding_op.cc index 40f43936db..c27befe114 100644 --- a/paddle/fluid/operators/crf_decoding_op.cc +++ b/paddle/fluid/operators/crf_decoding_op.cc @@ -53,21 +53,18 @@ sequence of observed tags. The output of this operator changes according to whether Input(Label) is given: 1. Input(Label) is given: - -This happens in training. This operator is used to co-work with the chunk_eval -operator. - -When Input(Label) is given, the crf_decoding operator returns a row vector -with shape [N x 1] whose values are fixed to be 0, indicating an incorrect -prediction, or 1 indicating a tag is correctly predicted. Such an output is the -input to chunk_eval operator. + This happens in training. This operator is used to co-work with the chunk_eval + operator. + When Input(Label) is given, the crf_decoding operator returns a row vector + with shape [N x 1] whose values are fixed to be 0, indicating an incorrect + prediction, or 1 indicating a tag is correctly predicted. Such an output is the + input to chunk_eval operator. 2. Input(Label) is not given: - -This is the standard decoding process. + This is the standard decoding process. The crf_decoding operator returns a row vector with shape [N x 1] whose values -range from 0 to maximum tag number - 1. Each element indicates an index of a +range from 0 to maximum tag number - 1, Each element indicates an index of a predicted tag. )DOC"); } diff --git a/paddle/fluid/operators/crop_op.h b/paddle/fluid/operators/crop_op.h index 91cfbbda73..772e80bbea 100644 --- a/paddle/fluid/operators/crop_op.h +++ b/paddle/fluid/operators/crop_op.h @@ -52,7 +52,7 @@ static std::vector GetOffsets(const framework::ExecutionContext& ctx) { } else { res = ctx.Attr>("offsets"); PADDLE_ENFORCE_EQ( - rank, res.size(), + rank, static_cast(res.size()), "Offsets size should be equal to dimension size of input tensor."); } return res; diff --git a/paddle/fluid/operators/cross_entropy_op.cc b/paddle/fluid/operators/cross_entropy_op.cc index d5e095f9ca..a3bec3da45 100644 --- a/paddle/fluid/operators/cross_entropy_op.cc +++ b/paddle/fluid/operators/cross_entropy_op.cc @@ -124,8 +124,7 @@ class CrossEntropyOpMaker : public framework::OpProtoAndCheckerMaker { "Tensor with shape [N x D]."); AddOutput("Y", "(Tensor, default Tensor), a 2-D tensor with shape " - "[N x 1]. The cross entropy loss.") - .Reuse("X"); + "[N x 1]. The cross entropy loss."); AddAttr("soft_label", "(bool, default false), a flag indicating whether to " "interpretate the given labels as soft labels.") diff --git a/paddle/fluid/operators/cumsum_op.cc b/paddle/fluid/operators/cumsum_op.cc index 92bb835e8f..5302b822d6 100644 --- a/paddle/fluid/operators/cumsum_op.cc +++ b/paddle/fluid/operators/cumsum_op.cc @@ -30,19 +30,19 @@ class CumOp : public framework::OperatorWithKernel { class CumsumOpMaker : public framework::OpProtoAndCheckerMaker { public: void Make() override { - AddInput("X", "Input of Cumsum operator"); - AddOutput("Out", "Output of Cumsum operator"); + AddInput("X", "Input of cumsum operator"); + AddOutput("Out", "Output of cumsum operator"); AddAttr("axis", - "(int, default -1). The dimenstion to accumulate along. " - "-1 means the last dimenstion") + "The dimenstion to accumulate along. -1 means the last " + "dimenstion [default -1].") .SetDefault(-1) .EqualGreaterThan(-1); AddAttr("exclusive", - "bool, default false). Whether to perform exclusive cumsum") + "Whether to perform exclusive cumsum. [default false].") .SetDefault(false); AddAttr("reverse", - "bool, default false). If true, the cumsum is performed in " - "the reversed direction") + "If true, the cumsum is performed in the reversed direction. " + "[default false].") .SetDefault(false); AddComment(R"DOC( The cumulative sum of the elements along a given axis. diff --git a/paddle/fluid/operators/detail/macros.h b/paddle/fluid/operators/detail/macros.h index da1de72dad..6f4a15caa5 100644 --- a/paddle/fluid/operators/detail/macros.h +++ b/paddle/fluid/operators/detail/macros.h @@ -14,14 +14,22 @@ #pragma once +#ifdef PADDLE_WITH_DISTRIBUTE + #ifdef PADDLE_WITH_GRPC -#include "paddle/fluid/operators/detail/grpc_client.h" -#include "paddle/fluid/operators/detail/grpc_server.h" -#define RPCSERVER_T detail::AsyncGRPCServer -#define RPCCLIENT_T detail::GRPCClient -#else -#include "paddle/fluid/operators/detail/brpc_client.h" -#include "paddle/fluid/operators/detail/brpc_server.h" -#define RPCSERVER_T detail::AsyncBRPCServer -#define RPCCLIENT_T detail::BRPCClient -#endif + +#include "paddle/fluid/operators/distributed/grpc_client.h" +#include "paddle/fluid/operators/distributed/grpc_server.h" +#define RPCSERVER_T paddle::operators::distributed::AsyncGRPCServer +#define RPCCLIENT_T paddle::operators::distributed::GRPCClient + +#else // PADDLE_WITH_GRPC + +#include "paddle/fluid/operators/distributed/brpc_client.h" +#include "paddle/fluid/operators/distributed/brpc_server.h" +#define RPCSERVER_T paddle::operators::distributed::AsyncBRPCServer +#define RPCCLIENT_T paddle::operators::distributed::BRPCClient + +#endif // PADDLE_WITH_GRPC + +#endif // PADDLE_WITH_DISTRIBUTE diff --git a/paddle/fluid/operators/detection/CMakeLists.txt b/paddle/fluid/operators/detection/CMakeLists.txt index 20d960f9fe..6d296ff7bf 100644 --- a/paddle/fluid/operators/detection/CMakeLists.txt +++ b/paddle/fluid/operators/detection/CMakeLists.txt @@ -22,6 +22,8 @@ iou_similarity_op.cu) detection_library(mine_hard_examples_op SRCS mine_hard_examples_op.cc) detection_library(multiclass_nms_op SRCS multiclass_nms_op.cc) detection_library(prior_box_op SRCS prior_box_op.cc prior_box_op.cu) +detection_library(anchor_generator_op SRCS anchor_generator_op.cc +anchor_generator_op.cu) detection_library(target_assign_op SRCS target_assign_op.cc target_assign_op.cu) detection_library(polygon_box_transform_op SRCS polygon_box_transform_op.cc diff --git a/paddle/fluid/operators/detection/anchor_generator_op.cc b/paddle/fluid/operators/detection/anchor_generator_op.cc new file mode 100644 index 0000000000..0c0155a0a9 --- /dev/null +++ b/paddle/fluid/operators/detection/anchor_generator_op.cc @@ -0,0 +1,154 @@ +/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#include "paddle/fluid/operators/detection/anchor_generator_op.h" + +namespace paddle { +namespace operators { + +class AnchorGeneratorOp : public framework::OperatorWithKernel { + public: + using framework::OperatorWithKernel::OperatorWithKernel; + + void InferShape(framework::InferShapeContext* ctx) const override { + PADDLE_ENFORCE(ctx->HasInput("Input"), + "Input(Input) of AnchorGeneratorOp should not be null."); + PADDLE_ENFORCE(ctx->HasOutput("Anchors"), + "Output(Anchors) of AnchorGeneratorOp should not be null."); + PADDLE_ENFORCE( + ctx->HasOutput("Variances"), + "Output(Variances) of AnchorGeneratorOp should not be null."); + + auto input_dims = ctx->GetInputDim("Input"); + PADDLE_ENFORCE(input_dims.size() == 4, "The layout of input is NCHW."); + + auto anchor_sizes = ctx->Attrs().Get>("anchor_sizes"); + auto aspect_ratios = ctx->Attrs().Get>("aspect_ratios"); + auto stride = ctx->Attrs().Get>("stride"); + auto variances = ctx->Attrs().Get>("variances"); + + size_t num_anchors = aspect_ratios.size() * anchor_sizes.size(); + + std::vector dim_vec(4); + dim_vec[0] = input_dims[2]; + dim_vec[1] = input_dims[3]; + dim_vec[2] = num_anchors; + dim_vec[3] = 4; + ctx->SetOutputDim("Anchors", framework::make_ddim(dim_vec)); + ctx->SetOutputDim("Variances", framework::make_ddim(dim_vec)); + } + + protected: + framework::OpKernelType GetExpectedKernelType( + const framework::ExecutionContext& ctx) const override { + return framework::OpKernelType( + framework::ToDataType(ctx.Input("Input")->type()), + ctx.device_context()); + } +}; + +class AnchorGeneratorOpMaker : public framework::OpProtoAndCheckerMaker { + public: + void Make() override { + AddInput("Input", + "(Tensor, default Tensor), " + "the input feature is a tensor with a rank of 4. " + "The layout is NCHW."); + AddOutput("Anchors", + "(Tensor, default Tensor), the output is a " + "tensor with a rank of 4. The layout is [H, W, num_anchors, 4]. " + "H is the height of input, W is the width of input, num_anchors " + "is the box count of each position. " + "Each anchor is in (xmin, ymin, xmax, ymax) format"); + AddOutput("Variances", + "(Tensor, default Tensor), the expanded variances for " + "normalizing bbox regression targets. The layout is [H, W, " + "num_anchors, 4]. " + "H is the height of input, W is the width of input, num_anchors " + "is the box count of each position. " + "Each variance is in (xcenter, ycenter, w, h) format"); + + AddAttr>( + "anchor_sizes", + "(vector) List of Region Proposal Network(RPN) anchor sizes " + " given in absolute pixels e.g. (64, 128, 256, 512)." + " For instance, the anchor size of 64 means the area of this anchor " + "equals to 64**2.") + .AddCustomChecker([](const std::vector& anchor_sizes) { + PADDLE_ENFORCE_GT(anchor_sizes.size(), 0, + "Size of anchor_sizes must be at least 1."); + for (size_t i = 0; i < anchor_sizes.size(); ++i) { + PADDLE_ENFORCE_GT(anchor_sizes[i], 0.0, + "anchor_sizes[%d] must be positive.", i); + } + }); + AddAttr>( + "aspect_ratios", + "(vector) List of Region Proposal Network(RPN) anchor aspect " + "ratios, e.g. (0.5, 1, 2)." + "For instacne, the aspect ratio of 0.5 means the height / width of " + "this anchor equals 0.5."); + + AddAttr>("variances", + "(vector) List of variances to be used " + "in box regression deltas") + .AddCustomChecker([](const std::vector& variances) { + PADDLE_ENFORCE_EQ(variances.size(), 4, + "Must and only provide 4 variance."); + for (size_t i = 0; i < variances.size(); ++i) { + PADDLE_ENFORCE_GT(variances[i], 0.0, + "variance[%d] must be greater than 0.", i); + } + }); + + AddAttr>("stride", + "Anchors stride across width and height, " + "with a default of (16, 16)") + .SetDefault(std::vector(2, 16.0)) + .AddCustomChecker([](const std::vector& stride) { + PADDLE_ENFORCE_EQ( + stride.size(), 2, + "Must and only provide 2 stride for width and height."); + for (size_t i = 0; i < stride.size(); ++i) { + PADDLE_ENFORCE_GT(stride[i], 0.0, + "stride[%d] should be larger than 0.", i); + } + }); + + AddAttr("offset", + "(float) " + "Anchor center offset, with a default of 0.5") + .SetDefault(0.5); + AddComment(R"DOC( +AnchorGenerator operator +Generates anchors for Faster RCNN, FPN etc. algorithm. +Each position of the input produce N anchors, N = + size(anchor_sizes) * size(aspect_ratios). + +Please get more information from the following papers: +https://arxiv.org/abs/1506.01497. +)DOC"); + } +}; + +} // namespace operators +} // namespace paddle + +namespace ops = paddle::operators; +REGISTER_OPERATOR(anchor_generator, ops::AnchorGeneratorOp, + ops::AnchorGeneratorOpMaker, + paddle::framework::EmptyGradOpMaker); + +REGISTER_OP_CPU_KERNEL(anchor_generator, ops::AnchorGeneratorOpKernel, + ops::AnchorGeneratorOpKernel); diff --git a/paddle/fluid/operators/detection/anchor_generator_op.cu b/paddle/fluid/operators/detection/anchor_generator_op.cu new file mode 100644 index 0000000000..3cc9bbeee1 --- /dev/null +++ b/paddle/fluid/operators/detection/anchor_generator_op.cu @@ -0,0 +1,132 @@ +/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#include "paddle/fluid/operators/detection/anchor_generator_op.h" + +namespace paddle { +namespace operators { + +template +__global__ void GenAnchors(T* out, const T* aspect_ratios, const int ar_num, + const T* anchor_sizes, const int as_num, + const T* stride, const int sd_num, const int height, + const int width, const T offset) { + int num_anchors = as_num * ar_num; + int box_num = height * width * num_anchors; + for (int i = blockIdx.x * blockDim.x + threadIdx.x; i < box_num; + i += blockDim.x * gridDim.x) { + int h_idx = i / (num_anchors * width); + int w_idx = (i / num_anchors) % width; + T stride_width = stride[0]; + T stride_height = stride[1]; + T x_ctr = (w_idx * stride_width) + offset * (stride_width - 1); + T y_ctr = (h_idx * stride_height) + offset * (stride_height - 1); + T area, area_ratios; + T base_w, base_h; + T scale_w, scale_h; + T anchor_width, anchor_height; + int anch_idx = i % num_anchors; + int ar_idx = anch_idx / as_num; + int as_idx = anch_idx % as_num; + T aspect_ratio = aspect_ratios[ar_idx]; + T anchor_size = anchor_sizes[as_idx]; + area = stride_width * stride_height; + area_ratios = area / aspect_ratio; + base_w = round(sqrt(area_ratios)); + base_h = round(base_w * aspect_ratio); + scale_w = anchor_size / stride_width; + scale_h = anchor_size / stride_height; + anchor_width = scale_w * base_w; + anchor_height = scale_h * base_h; + + T xmin = (x_ctr - 0.5 * (anchor_width - 1)); + T ymin = (y_ctr - 0.5 * (anchor_height - 1)); + T xmax = (x_ctr + 0.5 * (anchor_width - 1)); + T ymax = (y_ctr + 0.5 * (anchor_height - 1)); + out[i * 4] = xmin; + out[i * 4 + 1] = ymin; + out[i * 4 + 2] = xmax; + out[i * 4 + 3] = ymax; + } +} + +template +__global__ void SetVariance(T* out, const T* var, const int vnum, + const int num) { + for (int i = blockIdx.x * blockDim.x + threadIdx.x; i < num; + i += blockDim.x * gridDim.x) { + out[i] = var[i % vnum]; + } +} + +template +class AnchorGeneratorOpCUDAKernel : public framework::OpKernel { + public: + void Compute(const framework::ExecutionContext& ctx) const override { + auto* input = ctx.Input("Input"); + auto* anchors = ctx.Output("Anchors"); + auto* vars = ctx.Output("Variances"); + + auto anchor_sizes = ctx.Attr>("anchor_sizes"); + auto aspect_ratios = ctx.Attr>("aspect_ratios"); + auto stride = ctx.Attr>("stride"); + auto variances = ctx.Attr>("variances"); + + T offset = static_cast(ctx.Attr("offset")); + + auto width = input->dims()[3]; + auto height = input->dims()[2]; + + int num_anchors = aspect_ratios.size() * anchor_sizes.size(); + + int box_num = width * height * num_anchors; + + int block = 512; + int grid = (box_num + block - 1) / block; + + auto stream = + ctx.template device_context().stream(); + + anchors->mutable_data(ctx.GetPlace()); + vars->mutable_data(ctx.GetPlace()); + + framework::Tensor ar; + framework::TensorFromVector(aspect_ratios, ctx.device_context(), &ar); + + framework::Tensor as; + framework::TensorFromVector(anchor_sizes, ctx.device_context(), &as); + + framework::Tensor sd; + framework::TensorFromVector(stride, ctx.device_context(), &sd); + + GenAnchors<<>>( + anchors->data(), ar.data(), aspect_ratios.size(), as.data(), + anchor_sizes.size(), sd.data(), stride.size(), height, width, + offset); + + framework::Tensor v; + framework::TensorFromVector(variances, ctx.device_context(), &v); + grid = (box_num * 4 + block - 1) / block; + SetVariance<<>>(vars->data(), v.data(), + variances.size(), box_num * 4); + } +}; // namespace operators + +} // namespace operators +} // namespace paddle + +namespace ops = paddle::operators; +REGISTER_OP_CUDA_KERNEL(anchor_generator, + ops::AnchorGeneratorOpCUDAKernel, + ops::AnchorGeneratorOpCUDAKernel); diff --git a/paddle/fluid/operators/detection/anchor_generator_op.h b/paddle/fluid/operators/detection/anchor_generator_op.h new file mode 100644 index 0000000000..e0e499d76a --- /dev/null +++ b/paddle/fluid/operators/detection/anchor_generator_op.h @@ -0,0 +1,109 @@ +/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#pragma once +#include +#include +#include "paddle/fluid/framework/op_registry.h" +#include "paddle/fluid/operators/math/math_function.h" +#include "paddle/fluid/platform/transform.h" + +namespace paddle { +namespace operators { + +template +class AnchorGeneratorOpKernel : public framework::OpKernel { + public: + void Compute(const framework::ExecutionContext& ctx) const override { + auto* input = ctx.Input("Input"); + auto* anchors = ctx.Output("Anchors"); + auto* vars = ctx.Output("Variances"); + + auto anchor_sizes = ctx.Attr>("anchor_sizes"); + auto aspect_ratios = ctx.Attr>("aspect_ratios"); + auto stride = ctx.Attr>("stride"); + auto variances = ctx.Attr>("variances"); + + T offset = static_cast(ctx.Attr("offset")); + + auto feature_width = input->dims()[3]; + auto feature_height = input->dims()[2]; + + T stride_width, stride_height; + stride_width = stride[0]; + stride_height = stride[1]; + + int num_anchors = aspect_ratios.size() * anchor_sizes.size(); + + anchors->mutable_data(ctx.GetPlace()); + vars->mutable_data(ctx.GetPlace()); + + auto e_anchors = framework::EigenTensor::From(*anchors); + for (int h_idx = 0; h_idx < feature_height; ++h_idx) { + for (int w_idx = 0; w_idx < feature_width; ++w_idx) { + T x_ctr = (w_idx * stride_width) + offset * (stride_width - 1); + T y_ctr = (h_idx * stride_height) + offset * (stride_height - 1); + T area, area_ratios; + T base_w, base_h; + T scale_w, scale_h; + T anchor_width, anchor_height; + int idx = 0; + for (size_t r = 0; r < aspect_ratios.size(); ++r) { + auto ar = aspect_ratios[r]; + for (size_t s = 0; s < anchor_sizes.size(); ++s) { + auto anchor_size = anchor_sizes[s]; + area = stride_width * stride_height; + area_ratios = area / ar; + base_w = round(sqrt(area_ratios)); + base_h = round(base_w * ar); + scale_w = anchor_size / stride_width; + scale_h = anchor_size / stride_height; + anchor_width = scale_w * base_w; + anchor_height = scale_h * base_h; + e_anchors(h_idx, w_idx, idx, 0) = + (x_ctr - 0.5 * (anchor_width - 1)); + e_anchors(h_idx, w_idx, idx, 1) = + (y_ctr - 0.5 * (anchor_height - 1)); + e_anchors(h_idx, w_idx, idx, 2) = + (x_ctr + 0.5 * (anchor_width - 1)); + e_anchors(h_idx, w_idx, idx, 3) = + (y_ctr + 0.5 * (anchor_height - 1)); + idx++; + } + } + } + } + + framework::Tensor var_t; + var_t.mutable_data( + framework::make_ddim({1, static_cast(variances.size())}), + ctx.GetPlace()); + auto var_et = framework::EigenTensor::From(var_t); + for (size_t i = 0; i < variances.size(); ++i) { + var_et(0, i) = variances[i]; + } + + int anchor_num = feature_height * feature_width * num_anchors; + auto var_dim = vars->dims(); + vars->Resize({anchor_num, static_cast(variances.size())}); + + auto e_vars = framework::EigenMatrix::From(*vars); + e_vars = var_et.broadcast(Eigen::DSizes(anchor_num, 1)); + + vars->Resize(var_dim); + } +}; // namespace operators + +} // namespace operators +} // namespace paddle diff --git a/paddle/fluid/operators/detection/bipartite_match_op.cc b/paddle/fluid/operators/detection/bipartite_match_op.cc index d437ad5c19..c23b65fe4d 100644 --- a/paddle/fluid/operators/detection/bipartite_match_op.cc +++ b/paddle/fluid/operators/detection/bipartite_match_op.cc @@ -51,6 +51,12 @@ class BipartiteMatchOp : public framework::OperatorWithKernel { } }; +template +bool DistPairDescend(std::tuple pair1, + std::tuple pair2) { + return std::get<2>(pair1) > std::get<2>(pair2); +} + template class BipartiteMatchKernel : public framework::OpKernel { public: @@ -58,46 +64,76 @@ class BipartiteMatchKernel : public framework::OpKernel { // The match_dist must be initialized to 0 at first. void BipartiteMatch(const Tensor& dist, int* match_indices, T* match_dist) const { - constexpr T kEPS = static_cast(1e-6); PADDLE_ENFORCE_EQ(dist.dims().size(), 2, "The rank of dist must be 2."); int64_t row = dist.dims()[0]; int64_t col = dist.dims()[1]; auto* dist_data = dist.data(); - std::vector row_pool; - for (int i = 0; i < row; ++i) { - row_pool.push_back(i); - } - while (row_pool.size() > 0) { - int max_idx = -1; - int max_row_idx = -1; - T max_dist = -1; - for (int64_t j = 0; j < col; ++j) { - if (match_indices[j] != -1) { - continue; + // Test result: When row==130 the speed of these two methods almost the same + if (row >= 130) { + std::vector> match_pair; + + for (int64_t i = 0; i < row; ++i) { + for (int64_t j = 0; j < col; ++j) { + match_pair.push_back(std::make_tuple(i, j, dist_data[i * col + j])); } - for (size_t k = 0; k < row_pool.size(); ++k) { - int m = row_pool[k]; - // distance is 0 between m-th row and j-th column - if (dist_data[m * col + j] < kEPS) { + } + std::sort(match_pair.begin(), match_pair.end(), DistPairDescend); + std::vector row_indices(row, -1); + + int64_t idx = 0; + for (int64_t k = 0; k < row * col; ++k) { + int64_t i = std::get<0>(match_pair[k]); + int64_t j = std::get<1>(match_pair[k]); + T dist = std::get<2>(match_pair[k]); + + if (idx >= row) { + break; + } + if (match_indices[j] == -1 && row_indices[i] == -1 && dist > 0) { + match_indices[j] = i; + row_indices[i] = j; + match_dist[j] = dist; + idx += 1; + } + } + } else { + constexpr T kEPS = static_cast(1e-6); + std::vector row_pool; + for (int i = 0; i < row; ++i) { + row_pool.push_back(i); + } + while (row_pool.size() > 0) { + int max_idx = -1; + int max_row_idx = -1; + T max_dist = -1; + for (int64_t j = 0; j < col; ++j) { + if (match_indices[j] != -1) { continue; } - if (dist_data[m * col + j] > max_dist) { - max_idx = j; - max_row_idx = m; - max_dist = dist_data[m * col + j]; + for (size_t k = 0; k < row_pool.size(); ++k) { + int m = row_pool[k]; + // distance is 0 between m-th row and j-th column + if (dist_data[m * col + j] < kEPS) { + continue; + } + if (dist_data[m * col + j] > max_dist) { + max_idx = j; + max_row_idx = m; + max_dist = dist_data[m * col + j]; + } } } - } - if (max_idx == -1) { - // Cannot find good match. - break; - } else { - PADDLE_ENFORCE_EQ(match_indices[max_idx], -1); - match_indices[max_idx] = max_row_idx; - match_dist[max_idx] = max_dist; - // Erase the row index. - row_pool.erase( - std::find(row_pool.begin(), row_pool.end(), max_row_idx)); + if (max_idx == -1) { + // Cannot find good match. + break; + } else { + PADDLE_ENFORCE_EQ(match_indices[max_idx], -1); + match_indices[max_idx] = max_row_idx; + match_dist[max_idx] = max_dist; + // Erase the row index. + row_pool.erase( + std::find(row_pool.begin(), row_pool.end(), max_row_idx)); + } } } } diff --git a/paddle/fluid/operators/detection/box_coder_op.cc b/paddle/fluid/operators/detection/box_coder_op.cc index 8c4b4321b7..d0f95f727f 100644 --- a/paddle/fluid/operators/detection/box_coder_op.cc +++ b/paddle/fluid/operators/detection/box_coder_op.cc @@ -106,23 +106,36 @@ class BoxCoderOpMaker : public framework::OpProtoAndCheckerMaker { "and M represents the number of deocded boxes."); AddComment(R"DOC( -Bounding Box Coder Operator. + +Bounding Box Coder. + Encode/Decode the target bounding box with the priorbox information. + The Encoding schema described below: -ox = (tx - px) / pw / pxv -oy = (ty - py) / ph / pyv -ow = log(abs(tw / pw)) / pwv -oh = log(abs(th / ph)) / phv + + ox = (tx - px) / pw / pxv + + oy = (ty - py) / ph / pyv + + ow = log(abs(tw / pw)) / pwv + + oh = log(abs(th / ph)) / phv + The Decoding schema described below: -ox = (pw * pxv * tx * + px) - tw / 2 -oy = (ph * pyv * ty * + py) - th / 2 -ow = exp(pwv * tw) * pw + tw / 2 -oh = exp(phv * th) * ph + th / 2 -where tx, ty, tw, th denote the target box's center coordinates, width and -height respectively. Similarly, px, py, pw, ph denote the priorbox's(anchor) -center coordinates, width and height. pxv, pyv, pwv, phv denote the variance -of the priorbox and ox, oy, ow, oh denote the encoded/decoded coordinates, -width and height. + + ox = (pw * pxv * tx * + px) - tw / 2 + + oy = (ph * pyv * ty * + py) - th / 2 + + ow = exp(pwv * tw) * pw + tw / 2 + + oh = exp(phv * th) * ph + th / 2 + +where `tx`, `ty`, `tw`, `th` denote the target box's center coordinates, width +and height respectively. Similarly, `px`, `py`, `pw`, `ph` denote the +priorbox's (anchor) center coordinates, width and height. `pxv`, `pyv`, `pwv`, +`phv` denote the variance of the priorbox and `ox`, `oy`, `ow`, `oh` denote the +encoded/decoded coordinates, width and height. )DOC"); } }; diff --git a/paddle/fluid/operators/detection/iou_similarity_op.cc b/paddle/fluid/operators/detection/iou_similarity_op.cc index 8e58605fce..9c89b7ca9a 100644 --- a/paddle/fluid/operators/detection/iou_similarity_op.cc +++ b/paddle/fluid/operators/detection/iou_similarity_op.cc @@ -68,15 +68,16 @@ class IOUSimilarityOpMaker : public framework::OpProtoAndCheckerMaker { "representing pairwise iou scores."); AddComment(R"DOC( -IOU Similarity Operator. +**IOU Similarity Operator** + Computes intersection-over-union (IOU) between two box lists. - Box list 'X' should be a LoDTensor and 'Y' is a common Tensor, - boxes in 'Y' are shared by all instance of the batched inputs of X. - Given two boxes A and B, the calculation of IOU is as follows: +Box list 'X' should be a LoDTensor and 'Y' is a common Tensor, +boxes in 'Y' are shared by all instance of the batched inputs of X. +Given two boxes A and B, the calculation of IOU is as follows: $$ IOU(A, B) = -\frac{area(A\cap B)}{area(A)+area(B)-area(A\cap B)} +\\frac{area(A\\cap B)}{area(A)+area(B)-area(A\\cap B)} $$ )DOC"); diff --git a/paddle/fluid/operators/detection/polygon_box_transform_op.cc b/paddle/fluid/operators/detection/polygon_box_transform_op.cc index 335e8dd470..568d50d457 100644 --- a/paddle/fluid/operators/detection/polygon_box_transform_op.cc +++ b/paddle/fluid/operators/detection/polygon_box_transform_op.cc @@ -83,11 +83,13 @@ class PolygonBoxTransformOpMaker : public framework::OpProtoAndCheckerMaker { AddComment(R"DOC( PolygonBoxTransform Operator. + +PolygonBoxTransform Operator is used to transform the coordinate shift to the real coordinate. + The input is the final geometry output in detection network. We use 2*n numbers to denote the coordinate shift from n corner vertices of the polygon_box to the pixel location. As each distance offset contains two numbers (xi, yi), the geometry output contains 2*n channels. -PolygonBoxTransform Operator is used to transform the coordinate shift to the real coordinate. )DOC"); } }; diff --git a/paddle/fluid/operators/detection_map_op.cc b/paddle/fluid/operators/detection_map_op.cc index 716c8625d3..d7f49a9590 100644 --- a/paddle/fluid/operators/detection_map_op.cc +++ b/paddle/fluid/operators/detection_map_op.cc @@ -175,12 +175,12 @@ class DetectionMAPOpMaker : public framework::OpProtoAndCheckerMaker { AddComment(R"DOC( Detection mAP evaluate operator. The general steps are as follows. First, calculate the true positive and - false positive according to the input of detection and labels, then - calculate the mAP evaluate value. - Supporting '11 point' and 'integral' mAP algorithm. Please get more information - from the following articles: - https://sanchom.wordpress.com/tag/average-precision/ - https://arxiv.org/abs/1512.02325 +false positive according to the input of detection and labels, then +calculate the mAP evaluate value. +Supporting '11 point' and 'integral' mAP algorithm. Please get more information +from the following articles: +https://sanchom.wordpress.com/tag/average-precision/ +https://arxiv.org/abs/1512.02325 )DOC"); } diff --git a/paddle/fluid/operators/detail/CMakeLists.txt b/paddle/fluid/operators/distributed/CMakeLists.txt similarity index 94% rename from paddle/fluid/operators/detail/CMakeLists.txt rename to paddle/fluid/operators/distributed/CMakeLists.txt index abc5aad043..675ca36774 100644 --- a/paddle/fluid/operators/detail/CMakeLists.txt +++ b/paddle/fluid/operators/distributed/CMakeLists.txt @@ -1,8 +1,3 @@ -if(NOT WITH_DISTRIBUTE) - return() -endif() - - if(WITH_GRPC) grpc_library(sendrecvop_grpc SRCS bytebuffer_stream.cc sendrecvop_utils.cc grpc_client.cc request_handler_impl.cc rpc_client.cc rpc_server.cc grpc_server.cc variable_response.cc PROTO send_recv.proto DEPS lod_tensor @@ -10,7 +5,7 @@ if(WITH_GRPC) set(DISTRIBUTE_COMPILE_FLAGS "-Wno-non-virtual-dtor -Wno-error=non-virtual-dtor -Wno-error=delete-non-virtual-dtor") set_source_files_properties(grpc_serde_test.cc rpc_server_test.cc PROPERTIES COMPILE_FLAGS ${DISTRIBUTE_COMPILE_FLAGS}) cc_test(serde_test SRCS grpc_serde_test.cc variable_response.cc DEPS grpc++_unsecure grpc_unsecure gpr - cares zlib protobuf sendrecvop_grpc SERIAL) + cares zlib protobuf sendrecvop_grpc scope profiler math_function SERIAL) cc_test(grpc_server_test SRCS rpc_server_test.cc DEPS sendrecvop_grpc grpc++_unsecure grpc_unsecure gpr cares zlib protobuf executor proto_desc lookup_table_op SERIAL) diff --git a/paddle/fluid/operators/detail/brpc_client.cc b/paddle/fluid/operators/distributed/brpc_client.cc similarity index 98% rename from paddle/fluid/operators/detail/brpc_client.cc rename to paddle/fluid/operators/distributed/brpc_client.cc index 9a4e410f1d..b394c678fb 100644 --- a/paddle/fluid/operators/detail/brpc_client.cc +++ b/paddle/fluid/operators/distributed/brpc_client.cc @@ -12,12 +12,12 @@ // See the License for the specific language governing permissions and // limitations under the License. -#include "paddle/fluid/operators/detail/brpc_client.h" +#include "paddle/fluid/operators/distributed/brpc_client.h" #include "paddle/fluid/framework/threadpool.h" namespace paddle { namespace operators { -namespace detail { +namespace distributed { DEFINE_int32(brpc_channel_num, 24, "Number of channels to send requests connected to one server"); @@ -175,6 +175,6 @@ ChannelQueuePtr BRPCClient::GetChannel(const std::string& ep) { return q; } -} // namespace detail +} // namespace distributed } // namespace operators } // namespace paddle diff --git a/paddle/fluid/operators/detail/brpc_client.h b/paddle/fluid/operators/distributed/brpc_client.h similarity index 80% rename from paddle/fluid/operators/detail/brpc_client.h rename to paddle/fluid/operators/distributed/brpc_client.h index 1e953ea431..8ff1f0a607 100644 --- a/paddle/fluid/operators/detail/brpc_client.h +++ b/paddle/fluid/operators/distributed/brpc_client.h @@ -31,13 +31,13 @@ limitations under the License. */ #include "paddle/fluid/framework/lod_tensor.h" #include "paddle/fluid/framework/scope.h" #include "paddle/fluid/framework/selected_rows.h" -#include "paddle/fluid/operators/detail/rpc_client.h" -#include "paddle/fluid/operators/detail/send_recv.pb.h" +#include "paddle/fluid/operators/distributed/rpc_client.h" +#include "paddle/fluid/operators/distributed/send_recv.pb.h" #include "paddle/fluid/platform/macros.h" // for DISABLE_COPY_AND_ASSIGN namespace paddle { namespace operators { -namespace detail { +namespace distributed { struct ChannelContext { brpc::Channel channel; @@ -55,26 +55,24 @@ class BRPCClient : public RPCClient { bool AsyncSendVar(const std::string& ep, const platform::DeviceContext& ctx, const framework::Scope& scope, const std::string& var_name, - int64_t time_out = RPCClient::rpc_time_out) override; + int64_t time_out = FLAGS_rpc_deadline) override; bool AsyncGetVar(const std::string& ep, const platform::DeviceContext& ctx, const framework::Scope& scope, const std::string& var_name, - int64_t time_out = RPCClient::rpc_time_out) override; + int64_t time_out = FLAGS_rpc_deadline) override; bool AsyncPrefetchVar(const std::string& ep, const platform::DeviceContext& ctx, const framework::Scope& scope, const std::string& in_var_name, const std::string& out_var_name, - int64_t time_out = RPCClient::rpc_time_out) override; + int64_t time_out = FLAGS_rpc_deadline) override; - void AsyncSendBatchBarrier( - const std::string& ep, - int64_t time_out = RPCClient::rpc_time_out) override; + void AsyncSendBatchBarrier(const std::string& ep, + int64_t time_out = FLAGS_rpc_deadline) override; - void AsyncSendFetchBarrier( - const std::string& ep, - int64_t time_out = RPCClient::rpc_time_out) override; + void AsyncSendFetchBarrier(const std::string& ep, + int64_t time_out = FLAGS_rpc_deadline) override; void Wait() override; @@ -95,6 +93,6 @@ class BRPCClient : public RPCClient { DISABLE_COPY_AND_ASSIGN(BRPCClient); }; -} // namespace detail +} // namespace distributed } // namespace operators } // namespace paddle diff --git a/paddle/fluid/operators/detail/brpc_server.cc b/paddle/fluid/operators/distributed/brpc_server.cc similarity index 86% rename from paddle/fluid/operators/detail/brpc_server.cc rename to paddle/fluid/operators/distributed/brpc_server.cc index 2170abe679..862167f020 100644 --- a/paddle/fluid/operators/detail/brpc_server.cc +++ b/paddle/fluid/operators/distributed/brpc_server.cc @@ -12,13 +12,13 @@ // See the License for the specific language governing permissions and // limitations under the License. -#include "paddle/fluid/operators/detail/brpc_server.h" -#include "paddle/fluid/operators/detail/request_handler.h" +#include "paddle/fluid/operators/distributed/brpc_server.h" +#include "paddle/fluid/operators/distributed/request_handler.h" namespace sendrecv { typedef std::unordered_map + paddle::operators::distributed::RequestHandler*> HandlerMap; class BRPCServiceImpl : public SendRecvService { @@ -27,17 +27,17 @@ class BRPCServiceImpl : public SendRecvService { : request_send_h_(nullptr), request_get_h_(nullptr), request_prefetch_h_(nullptr) { - auto it = rpc_call_map.find(paddle::operators::detail::kRequestSend); + auto it = rpc_call_map.find(paddle::operators::distributed::kRequestSend); if (it != rpc_call_map.end()) { request_send_h_ = it->second; } - it = rpc_call_map.find(paddle::operators::detail::kRequestSend); + it = rpc_call_map.find(paddle::operators::distributed::kRequestSend); if (it != rpc_call_map.end()) { request_get_h_ = it->second; } - it = rpc_call_map.find(paddle::operators::detail::kRequestPrefetch); + it = rpc_call_map.find(paddle::operators::distributed::kRequestPrefetch); if (it != rpc_call_map.end()) { request_prefetch_h_ = it->second; } @@ -88,15 +88,15 @@ class BRPCServiceImpl : public SendRecvService { } private: - paddle::operators::detail::RequestHandler* request_send_h_; - paddle::operators::detail::RequestHandler* request_get_h_; - paddle::operators::detail::RequestHandler* request_prefetch_h_; + paddle::operators::distributed::RequestHandler* request_send_h_; + paddle::operators::distributed::RequestHandler* request_get_h_; + paddle::operators::distributed::RequestHandler* request_prefetch_h_; }; } // namespace sendrecv namespace paddle { namespace operators { -namespace detail { +namespace distributed { void AsyncBRPCServer::StartServer() { // Instance of your service. @@ -139,6 +139,6 @@ void AsyncBRPCServer::WaitServerReady() { VLOG(3) << "AsyncGRPCServer WaitSeverReady"; } -}; // namespace detail +}; // namespace distributed }; // namespace operators }; // namespace paddle diff --git a/paddle/fluid/operators/detail/brpc_server.h b/paddle/fluid/operators/distributed/brpc_server.h similarity index 88% rename from paddle/fluid/operators/detail/brpc_server.h rename to paddle/fluid/operators/distributed/brpc_server.h index 0105c8074a..85a7ad0dfe 100644 --- a/paddle/fluid/operators/detail/brpc_server.h +++ b/paddle/fluid/operators/distributed/brpc_server.h @@ -19,12 +19,12 @@ limitations under the License. */ #include #include "brpc/server.h" -#include "paddle/fluid/operators/detail/rpc_server.h" -#include "paddle/fluid/operators/detail/send_recv.pb.h" +#include "paddle/fluid/operators/distributed/rpc_server.h" +#include "paddle/fluid/operators/distributed/send_recv.pb.h" namespace paddle { namespace operators { -namespace detail { +namespace distributed { class AsyncBRPCServer final : public RPCServer { public: @@ -48,6 +48,6 @@ class AsyncBRPCServer final : public RPCServer { int ready_; }; -}; // namespace detail +}; // namespace distributed }; // namespace operators }; // namespace paddle diff --git a/paddle/fluid/operators/detail/bytebuffer_stream.cc b/paddle/fluid/operators/distributed/bytebuffer_stream.cc similarity index 94% rename from paddle/fluid/operators/detail/bytebuffer_stream.cc rename to paddle/fluid/operators/distributed/bytebuffer_stream.cc index a14171563e..6e91b447db 100644 --- a/paddle/fluid/operators/detail/bytebuffer_stream.cc +++ b/paddle/fluid/operators/distributed/bytebuffer_stream.cc @@ -17,11 +17,11 @@ limitations under the License. */ // file and did some modifications so that we can send gRPC // requests without too much copying of the tensor data. -#include "paddle/fluid/operators/detail/bytebuffer_stream.h" +#include "paddle/fluid/operators/distributed/bytebuffer_stream.h" namespace paddle { namespace operators { -namespace detail { +namespace distributed { GrpcByteBufferSource::GrpcByteBufferSource() {} @@ -83,6 +83,6 @@ google::protobuf::int64 GrpcByteBufferSource::ByteCount() const { return byte_count_; } -} // namespace detail +} // namespace distributed } // namespace operators } // namespace paddle diff --git a/paddle/fluid/operators/detail/bytebuffer_stream.h b/paddle/fluid/operators/distributed/bytebuffer_stream.h similarity index 99% rename from paddle/fluid/operators/detail/bytebuffer_stream.h rename to paddle/fluid/operators/distributed/bytebuffer_stream.h index 054dd4ff29..e7de172c79 100644 --- a/paddle/fluid/operators/detail/bytebuffer_stream.h +++ b/paddle/fluid/operators/distributed/bytebuffer_stream.h @@ -106,7 +106,7 @@ class GrpcBufferReader final namespace paddle { namespace operators { -namespace detail { +namespace distributed { // Source provides a way for a particular RPC implementation to provide // received data to ParseFrom. class Source { @@ -183,6 +183,6 @@ class GrpcByteSource : public Source { char space_[sizeof(Reader)]; }; -} // namespace detail +} // namespace distributed } // namespace operators } // namespace paddle diff --git a/paddle/fluid/operators/detail/grpc_client.cc b/paddle/fluid/operators/distributed/grpc_client.cc similarity index 79% rename from paddle/fluid/operators/detail/grpc_client.cc rename to paddle/fluid/operators/distributed/grpc_client.cc index 02ffe3651e..4a09f3870d 100644 --- a/paddle/fluid/operators/detail/grpc_client.cc +++ b/paddle/fluid/operators/distributed/grpc_client.cc @@ -12,19 +12,20 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. */ -#include "paddle/fluid/operators/detail/grpc_client.h" +#include "paddle/fluid/operators/distributed/grpc_client.h" #include #include +#include "glog/logging.h" // For VLOG #include "paddle/fluid/framework/threadpool.h" -#include "paddle/fluid/operators/detail/request_handler.h" +#include "paddle/fluid/operators/distributed/request_handler.h" #include "paddle/fluid/platform/profiler.h" namespace paddle { namespace operators { -namespace detail { +namespace distributed { void GRPCClient::InitImpl() { InitEventLoop(); } @@ -34,10 +35,20 @@ void GRPCClient::InitEventLoop() { client_thread_.reset(new std::thread(std::bind(&GRPCClient::Proceed, this))); } -void GRPCClient::SendComplete() { +void GRPCClient::SendBeginPass() { for (auto& it : channels_) { - this->AsyncSendComplete(it.first); + VLOG(3) << "send begin pass to: " << it.first; + this->AsyncSendBeginPass(it.first); } + this->Wait(); +} + +void GRPCClient::SendEndPass() { + for (auto& it : channels_) { + VLOG(3) << "send end pass to " << it.first; + this->AsyncSendEndPass(it.first); + } + this->Wait(); } GRPCClient::~GRPCClient() { @@ -75,6 +86,9 @@ bool GRPCClient::AsyncSendVar(const std::string& ep, var_h.scope = p_scope; var_h.name = var_name_val; var_h.ctx = p_ctx; + var_h.method = "Send"; + + VLOG(3) << var_h.String() << " begin"; // stub context SendProcessor* s = new SendProcessor(ch); @@ -129,6 +143,9 @@ bool GRPCClient::AsyncGetVar(const std::string& ep, var_h.scope = p_scope; var_h.name = var_name_val; var_h.ctx = p_ctx; + var_h.method = "Get"; + + VLOG(3) << var_h.String() << " begin"; // stub context GetProcessor* s = new GetProcessor(ch); @@ -172,6 +189,9 @@ bool GRPCClient::AsyncPrefetchVar(const std::string& ep, var_h.scope = p_scope; var_h.name = out_var_name_val; var_h.ctx = p_ctx; + var_h.method = "Prefetch"; + + VLOG(3) << var_h.String() << " begin"; // stub context GetProcessor* s = new GetProcessor(ch); @@ -216,19 +236,49 @@ void GRPCClient::AsyncSendFetchBarrier(const std::string& ep, req_count_++; } -void GRPCClient::AsyncSendComplete(const std::string& ep, int64_t time_out) { +void GRPCClient::AsyncSendBeginPass(const std::string& ep, int64_t time_out) { const auto ch = GetChannel(ep); BatchBarrierProcessor* s = new BatchBarrierProcessor(ch); s->Prepare(time_out); sendrecv::VariableMessage req; - req.set_varname(COMPLETE_MESSAGE); + req.set_varname(BEGIN_PASS_MESSAGE); auto rpc = s->stub_->AsyncSendVariable(s->context_.get(), req, &cq_); rpc->Finish(&s->reply_, &s->status_, reinterpret_cast(s)); req_count_++; } +void GRPCClient::AsyncSendEndPass(const std::string& ep, int64_t time_out) { + const auto ch = GetChannel(ep); + + FetchBarrierProcessor* s = new FetchBarrierProcessor(ch); + s->Prepare(time_out); + + sendrecv::VariableMessage req; + req.set_varname(END_PASS_MESSAGE); + auto rpc = s->stub_->AsyncGetVariable(s->context_.get(), req, &cq_); + rpc->Finish(&s->reply_, &s->status_, reinterpret_cast(s)); + req_count_++; +} + +void GRPCClient::AsyncCheckpointNotify(const std::string& ep, + const std::string& dir, + int64_t time_out) { + const auto ch = GetChannel(ep); + + CheckpointNotifyProcessor* s = new CheckpointNotifyProcessor(ch); + s->Prepare(time_out); + + sendrecv::VariableMessage req; + req.set_varname(CHECKPOINT_SAVE_MESSAGE); + req.set_out_varname(dir); + + auto rpc = s->stub_->AsyncCheckpointNotify(s->context_.get(), req, &cq_); + rpc->Finish(&s->reply_, &s->status_, reinterpret_cast(s)); + req_count_++; +} + void GRPCClient::Wait() { std::unique_lock lk(sync_mutex_); sync_cond_.wait(lk, [this] { return req_count_ == 0; }); @@ -243,10 +293,11 @@ void GRPCClient::Proceed() { GPR_ASSERT(ok); PADDLE_ENFORCE(c); if (c->status_.ok()) { + VLOG(3) << c->var_h_.String() << " process"; c->Process(); } else { - LOG(ERROR) << "var: " << c->var_h_.String() - << " grpc error:" << c->status_.error_message(); + LOG(FATAL) << c->var_h_.String() + << " meets grpc error:" << c->status_.error_message(); } delete c; { @@ -258,14 +309,15 @@ void GRPCClient::Proceed() { } std::shared_ptr GRPCClient::GetChannel(const std::string& ep) { - // TODO(Yancey1989): make grpc client completely thread-safe std::lock_guard guard(chan_mutex_); auto it = channels_.find(ep); if (it != channels_.end()) { return it->second; } + // Channel configurations: grpc::ChannelArguments args; + args.SetInt(GRPC_ARG_MAX_RECONNECT_BACKOFF_MS, 2000); args.SetCompressionAlgorithm(GRPC_COMPRESS_NONE); args.SetMaxSendMessageSize(std::numeric_limits::max()); args.SetMaxReceiveMessageSize(std::numeric_limits::max()); @@ -276,6 +328,6 @@ std::shared_ptr GRPCClient::GetChannel(const std::string& ep) { return ch; } -} // namespace detail +} // namespace distributed } // namespace operators } // namespace paddle diff --git a/paddle/fluid/operators/detail/grpc_client.h b/paddle/fluid/operators/distributed/grpc_client.h similarity index 74% rename from paddle/fluid/operators/detail/grpc_client.h rename to paddle/fluid/operators/distributed/grpc_client.h index 44000c028b..5dae20155e 100644 --- a/paddle/fluid/operators/detail/grpc_client.h +++ b/paddle/fluid/operators/distributed/grpc_client.h @@ -38,23 +38,27 @@ limitations under the License. */ #include "paddle/fluid/framework/lod_tensor.h" #include "paddle/fluid/framework/scope.h" #include "paddle/fluid/framework/selected_rows.h" -#include "paddle/fluid/operators/detail/rpc_client.h" -#include "paddle/fluid/operators/detail/sendrecvop_utils.h" +#include "paddle/fluid/operators/distributed/rpc_client.h" +#include "paddle/fluid/operators/distributed/sendrecvop_utils.h" #include "paddle/fluid/platform/macros.h" // for DISABLE_COPY_AND_ASSIGN namespace paddle { namespace operators { -namespace detail { +namespace distributed { struct VarHandle { + // RPC endpoint. std::string ep; const platform::DeviceContext* ctx; const framework::Scope* scope; + // Variable name. std::string name; + // RPC method name. + std::string method; std::string String() const { std::ostringstream s; - s << "name:[" << name << "] ep:[" << ep << "]"; + s << method << " name:[" << name << "], ep:[" << ep << "]"; return s.str(); } }; @@ -72,15 +76,18 @@ class BaseProcessor { virtual void Prepare(const VarHandle& var_info, int64_t time_out) { context_.reset(new grpc::ClientContext()); var_h_ = var_info; - - std::chrono::system_clock::time_point deadline = - std::chrono::system_clock::now() + std::chrono::milliseconds(time_out); - - context_->set_deadline(deadline); + context_->set_wait_for_ready(true); + if (time_out) { + std::chrono::system_clock::time_point deadline = + std::chrono::system_clock::now() + + std::chrono::milliseconds(time_out); + context_->set_deadline(deadline); + } } virtual void Prepare(int64_t time_out) { context_.reset(new grpc::ClientContext()); + context_->set_wait_for_ready(true); std::chrono::system_clock::time_point deadline = std::chrono::system_clock::now() + std::chrono::milliseconds(time_out); @@ -165,6 +172,20 @@ class FetchBarrierProcessor : public BaseProcessor { std::unique_ptr stub_; }; +class CheckpointNotifyProcessor : public BaseProcessor { + public: + explicit CheckpointNotifyProcessor(std::shared_ptr ch) + : BaseProcessor(ch) { + stub_ = sendrecv::SendRecvService::NewStub(ch); + } + + virtual ~CheckpointNotifyProcessor() {} + + virtual void Process() {} + sendrecv::VoidMessage reply_; + std::unique_ptr stub_; +}; + class GRPCClient : public RPCClient { public: GRPCClient() {} @@ -172,30 +193,39 @@ class GRPCClient : public RPCClient { bool AsyncSendVar(const std::string& ep, const platform::DeviceContext& ctx, const framework::Scope& scope, const std::string& var_name, - int64_t time_out = RPCClient::rpc_time_out) override; + int64_t time_out = FLAGS_rpc_deadline) override; bool AsyncGetVar(const std::string& ep, const platform::DeviceContext& ctx, const framework::Scope& scope, const std::string& var_name, - int64_t time_out = RPCClient::rpc_time_out) override; + int64_t time_out = FLAGS_rpc_deadline) override; bool AsyncPrefetchVar(const std::string& ep, const platform::DeviceContext& ctx, const framework::Scope& scope, const std::string& in_var_name, const std::string& out_var_name, - int64_t time_out = RPCClient::rpc_time_out) override; + int64_t time_out = FLAGS_rpc_deadline) override; - void AsyncSendBatchBarrier( - const std::string& ep, - int64_t time_out = RPCClient::rpc_time_out) override; + void AsyncSendBatchBarrier(const std::string& ep, + int64_t time_out = FLAGS_rpc_deadline) override; - void AsyncSendFetchBarrier( - const std::string& ep, - int64_t time_out = RPCClient::rpc_time_out) override; + void AsyncSendFetchBarrier(const std::string& ep, + int64_t time_out = FLAGS_rpc_deadline) override; + + void AsyncCheckpointNotify(const std::string& ep, const std::string& dir, + int64_t time_out = FLAGS_rpc_deadline) override; + + void AsyncSendBeginPass(const std::string& ep, + int64_t time_out = FLAGS_rpc_deadline) override; + + void AsyncSendEndPass(const std::string& ep, + int64_t time_out = FLAGS_rpc_deadline) override; void Wait() override; - void SendComplete() override; + void SendBeginPass() override; + + void SendEndPass() override; protected: void InitImpl() override; @@ -206,9 +236,6 @@ class GRPCClient : public RPCClient { void Proceed(); - void AsyncSendComplete(const std::string& ep, - int64_t time_out = RPCClient::rpc_time_out); - std::shared_ptr GetChannel(const std::string& ep); private: @@ -226,6 +253,6 @@ class GRPCClient : public RPCClient { DISABLE_COPY_AND_ASSIGN(GRPCClient); }; -} // namespace detail +} // namespace distributed } // namespace operators } // namespace paddle diff --git a/paddle/fluid/operators/detail/grpc_serde_test.cc b/paddle/fluid/operators/distributed/grpc_serde_test.cc similarity index 93% rename from paddle/fluid/operators/detail/grpc_serde_test.cc rename to paddle/fluid/operators/distributed/grpc_serde_test.cc index 15892295e6..3d107b533b 100644 --- a/paddle/fluid/operators/detail/grpc_serde_test.cc +++ b/paddle/fluid/operators/distributed/grpc_serde_test.cc @@ -21,8 +21,8 @@ limitations under the License. */ #include "paddle/fluid/framework/lod_tensor.h" #include "paddle/fluid/framework/tensor_util.h" #include "paddle/fluid/framework/variable.h" -#include "paddle/fluid/operators/detail/sendrecvop_utils.h" -#include "paddle/fluid/operators/detail/variable_response.h" +#include "paddle/fluid/operators/distributed/sendrecvop_utils.h" +#include "paddle/fluid/operators/distributed/variable_response.h" #include "paddle/fluid/operators/math/math_function.h" #include "paddle/fluid/platform/place.h" #include "paddle/fluid/string/printf.h" @@ -50,7 +50,7 @@ void RunSerdeTestSelectedRows(platform::Place place) { for (int i = 0; i < 564; ++i) rows->push_back(i); ::grpc::ByteBuffer msg; - operators::detail::SerializeToByteBuffer("myvar", &var, ctx, &msg); + operators::distributed::SerializeToByteBuffer("myvar", &var, ctx, &msg); EXPECT_GT(msg.Length(), static_cast(0)); // deserialize @@ -81,10 +81,10 @@ void RunSerdeTestSelectedRows(platform::Place place) { // deserialize zero-copy // framework::Variable var2; - // operators::detail::DeserializeFromByteBuffer(msg, ctx, &var2); + // operators::distributed::DeserializeFromByteBuffer(msg, ctx, &var2); framework::Scope scope; scope.Var("myvar"); - operators::detail::VariableResponse resp(&scope, &ctx); + operators::distributed::VariableResponse resp(&scope, &ctx); EXPECT_EQ(resp.Parse(msg), 0); framework::Variable* var2 = resp.GetVar(); @@ -128,7 +128,7 @@ void RunTestLodTensor(platform::Place place, int from_type = 0) { math::set_constant(ctx, tensor, 31.9); ::grpc::ByteBuffer msg; - operators::detail::SerializeToByteBuffer("myvar", &var, ctx, &msg); + operators::distributed::SerializeToByteBuffer("myvar", &var, ctx, &msg); EXPECT_GT(msg.Length(), static_cast(0)); // deserialize @@ -171,7 +171,7 @@ void RunTestLodTensor(platform::Place place, int from_type = 0) { // deserialize zero-copy framework::Scope scope; scope.Var("myvar"); - operators::detail::VariableResponse resp(&scope, &ctx); + operators::distributed::VariableResponse resp(&scope, &ctx); if (from_type == 0) { EXPECT_EQ(resp.Parse(msg), 0); } else { diff --git a/paddle/fluid/operators/detail/grpc_server.cc b/paddle/fluid/operators/distributed/grpc_server.cc similarity index 75% rename from paddle/fluid/operators/detail/grpc_server.cc rename to paddle/fluid/operators/distributed/grpc_server.cc index 5a87258901..f35e268f6a 100644 --- a/paddle/fluid/operators/detail/grpc_server.cc +++ b/paddle/fluid/operators/distributed/grpc_server.cc @@ -15,13 +15,13 @@ limitations under the License. */ #include #include -#include "paddle/fluid/operators/detail/grpc_server.h" +#include "paddle/fluid/operators/distributed/grpc_server.h" using ::grpc::ServerAsyncResponseWriter; namespace paddle { namespace operators { -namespace detail { +namespace distributed { enum CallStatus { PROCESS = 0, FINISH }; // reference: @@ -41,6 +41,19 @@ class RequestBase { virtual ~RequestBase() {} virtual void Process() = 0; + std::string Status2String(const std::string& method) { + std::string status = "Process"; + if (status_ == FINISH) { + status = "Finish"; + } + + std::ostringstream s; + s << method << " name:[" << GetReqName() << "]" + << ", ep:[" << ctx_.peer() << "]" + << " " << status << " using req_id:" << req_id_; + return s.str(); + } + CallStatus Status() const { std::lock_guard l(status_mu_); return status_; @@ -74,7 +87,7 @@ class RequestSend final : public RequestBase { request_.reset(new VariableResponse(request_handler->scope(), request_handler->dev_ctx(), !request_handler->sync_mode())); - int method_id = static_cast(detail::GrpcMethod::kSendVariable); + int method_id = static_cast(distributed::GrpcMethod::kSendVariable); service_->RequestAsyncUnary( method_id, &ctx_, request_.get(), &responder_, cq_, cq_, reinterpret_cast(static_cast(req_id))); @@ -84,7 +97,7 @@ class RequestSend final : public RequestBase { void Process() override { std::string varname = GetReqName(); - VLOG(3) << "RequestSend var_name:" << varname; + VLOG(4) << "RequestSend var_name:" << varname; auto scope = request_->GetMutableLocalScope(); auto invar = request_->GetVar(); @@ -106,7 +119,7 @@ class RequestGet final : public RequestBase { ::grpc::ServerCompletionQueue* cq, RequestHandler* request_handler, int req_id) : RequestBase(service, cq, request_handler, req_id), responder_(&ctx_) { - auto method_id = static_cast(detail::GrpcMethod::kGetVariable); + auto method_id = static_cast(distributed::GrpcMethod::kGetVariable); service_->RequestAsyncUnary( method_id, &ctx_, &request_, &responder_, cq_, cq_, reinterpret_cast(static_cast(req_id))); @@ -119,7 +132,7 @@ class RequestGet final : public RequestBase { void Process() override { // proc request. std::string varname = request_.varname(); - VLOG(3) << "RequestGet " << varname; + VLOG(4) << "RequestGet " << varname; auto scope = request_handler_->scope(); auto invar = scope->FindVar(varname); @@ -150,7 +163,8 @@ class RequestPrefetch final : public RequestBase { local_scope_(nullptr) { request_.reset(new VariableResponse(request_handler->scope(), request_handler->dev_ctx(), true)); - int method_id = static_cast(detail::GrpcMethod::kPrefetchVariable); + int method_id = + static_cast(distributed::GrpcMethod::kPrefetchVariable); service_->RequestAsyncUnary( method_id, &ctx_, request_.get(), &responder_, cq_, cq_, reinterpret_cast(static_cast(req_id))); @@ -164,7 +178,7 @@ class RequestPrefetch final : public RequestBase { // prefetch process... std::string in_var_name = request_->Varname(); std::string out_var_name = request_->OutVarname(); - VLOG(3) << "RequestPrefetch, in_var_name: " << in_var_name + VLOG(4) << "RequestPrefetch, in_var_name: " << in_var_name << " out_var_name: " << out_var_name; auto scope = request_->GetMutableLocalScope(); @@ -186,11 +200,50 @@ class RequestPrefetch final : public RequestBase { framework::Scope* local_scope_; }; +class RequestCheckpointNotify final : public RequestBase { + public: + explicit RequestCheckpointNotify(GrpcService::AsyncService* service, + ::grpc::ServerCompletionQueue* cq, + RequestHandler* request_handler, int req_id) + : RequestBase(service, cq, request_handler, req_id), responder_(&ctx_) { + request_.reset(new VariableResponse(request_handler->scope(), + request_handler->dev_ctx())); + int method_id = + static_cast(distributed::GrpcMethod::kCheckpointNotify); + service_->RequestAsyncUnary( + method_id, &ctx_, request_.get(), &responder_, cq_, cq_, + reinterpret_cast(static_cast(req_id))); + } + + virtual ~RequestCheckpointNotify() {} + + std::string GetReqName() override { return request_->Varname(); } + + void Process() override { + auto scope = request_->GetMutableLocalScope(); + + std::string checkpoint_notify = request_->Varname(); + std::string checkpoint_dir = request_->OutVarname(); + + VLOG(4) << "RequestCheckpointNotify notify: " << checkpoint_notify + << ", dir: " << checkpoint_dir; + + request_handler_->Handle(checkpoint_notify, scope, nullptr, nullptr, + checkpoint_dir); + Finish(reply_, &responder_); + } + + protected: + std::shared_ptr request_; + sendrecv::VoidMessage reply_; + ServerAsyncResponseWriter responder_; +}; + void AsyncGRPCServer::WaitServerReady() { - VLOG(3) << "AsyncGRPCServer is wait server ready"; + VLOG(4) << "AsyncGRPCServer is wait server ready"; std::unique_lock lock(this->mutex_ready_); condition_ready_.wait(lock, [=] { return this->ready_ == 1; }); - VLOG(3) << "AsyncGRPCServer WaitSeverReady"; + VLOG(4) << "AsyncGRPCServer WaitSeverReady"; } void AsyncGRPCServer::StartServer() { @@ -223,13 +276,14 @@ void AsyncGRPCServer::StartServer() { reqs.reserve(kRequestBufSize); for (int i = 0; i < kRequestBufSize; i++) { + VLOG(6) << "TryToRegisterNewOne on RPC NAME: " << rpc_name << " I: " << i; TryToRegisterNewOne(rpc_name, i); } for (int i = 0; i < threadnum; i++) { rpc_threads_[rpc_name].emplace_back(new std::thread(std::bind( &AsyncGRPCServer::HandleRequest, this, cq.get(), rpc_name, f))); - VLOG(3) << t.first << " creates threads!"; + VLOG(4) << t.first << " creates threads!"; } } @@ -246,7 +300,7 @@ void AsyncGRPCServer::StartServer() { auto& threads = t.second; for (size_t i = 0; i < threads.size(); ++i) { threads[i]->join(); - VLOG(3) << t.first << " threads ends!"; + VLOG(4) << t.first << " threads ends!"; } } } @@ -254,7 +308,7 @@ void AsyncGRPCServer::StartServer() { void AsyncGRPCServer::ShutdownQueue() { for (auto& t : rpc_cq_) { t.second->Shutdown(); - VLOG(3) << t.first << " shutdown!"; + VLOG(4) << t.first << " queue shutdown!"; } } @@ -263,7 +317,7 @@ void AsyncGRPCServer::ShutDownImpl() { is_shut_down_ = true; ShutdownQueue(); - VLOG(3) << "server_ shutdown!"; + VLOG(4) << "server_ shutdown!"; server_->Shutdown(); } @@ -271,12 +325,12 @@ void AsyncGRPCServer::TryToRegisterNewOne(const std::string& rpc_name, int req_id) { std::unique_lock lock(cq_mutex_); if (is_shut_down_) { - VLOG(3) << "shutdown, do not TryToRegisterNewSendOne"; + VLOG(4) << "shutdown, do not TryToRegisterNewSendOne"; return; } - VLOG(4) << "register send rpc_name:" << rpc_name - << ", handler:" << rpc_call_map_[kRequestSend]; + VLOG(4) << "TryToRegisterNewOne on RPC NAME: " << rpc_name + << " REQ ID: " << req_id; auto& reqs = rpc_reqs_[rpc_name]; auto& handler = rpc_call_map_[rpc_name]; @@ -289,6 +343,8 @@ void AsyncGRPCServer::TryToRegisterNewOne(const std::string& rpc_name, b = new RequestGet(&service_, cq.get(), handler, req_id); } else if (rpc_name == kRequestPrefetch) { b = new RequestPrefetch(&service_, cq.get(), handler, req_id); + } else if (rpc_name == kRequestCheckpoint) { + b = new RequestCheckpointNotify(&service_, cq.get(), handler, req_id); } else { PADDLE_ENFORCE(false, "not supported rpc"); } @@ -305,14 +361,14 @@ void AsyncGRPCServer::HandleRequest( bool ok = false; while (true) { - VLOG(3) << "HandleRequest " << rpc_name << " wait next"; + VLOG(4) << "HandleRequest " << rpc_name << " wait next"; if (!cq->Next(&tag, &ok)) { - LOG(INFO) << "CompletionQueue " << rpc_name << " shutdown!"; + VLOG(3) << "CompletionQueue " << rpc_name << " shutdown!"; break; } int req_id = static_cast(reinterpret_cast(tag)); - VLOG(3) << "HandleRequest " << rpc_name << ", req_id:" << req_id + VLOG(4) << "HandleRequest " << rpc_name << ", req_id:" << req_id << " get next"; auto& reqs = rpc_reqs_[rpc_name]; @@ -323,22 +379,21 @@ void AsyncGRPCServer::HandleRequest( base = reqs[req_id]; } + VLOG(3) << base->Status2String(rpc_name); + // reference: // https://github.com/tensorflow/tensorflow/issues/5596 // https://groups.google.com/forum/#!topic/grpc-io/xftlRy-IQwM // https://groups.google.com/forum/#!topic/grpc-io/ywATt88Ef_I if (!ok) { LOG(WARNING) << "completion queue:" << rpc_name - << " recv no regular event:argument name[" - << base->GetReqName() << "]"; + << " recv no regular event" + << " context:" << base->Status2String(rpc_name); TryToRegisterNewOne(rpc_name, req_id); delete base; continue; } - VLOG(3) << "queue id:" << rpc_name << ", req_id:" << req_id - << ", status:" << base->Status(); - switch (base->Status()) { case PROCESS: { base->Process(); @@ -354,6 +409,6 @@ void AsyncGRPCServer::HandleRequest( } } -} // namespace detail +} // namespace distributed } // namespace operators } // namespace paddle diff --git a/paddle/fluid/operators/detail/grpc_server.h b/paddle/fluid/operators/distributed/grpc_server.h similarity index 85% rename from paddle/fluid/operators/detail/grpc_server.h rename to paddle/fluid/operators/distributed/grpc_server.h index f1db7590f6..d2524f5e65 100644 --- a/paddle/fluid/operators/detail/grpc_server.h +++ b/paddle/fluid/operators/distributed/grpc_server.h @@ -29,17 +29,17 @@ limitations under the License. */ #include "paddle/fluid/framework/scope.h" #include "paddle/fluid/framework/selected_rows.h" #include "paddle/fluid/framework/var_type.h" -#include "paddle/fluid/operators/detail/grpc_service.h" -#include "paddle/fluid/operators/detail/request_handler.h" -#include "paddle/fluid/operators/detail/rpc_server.h" -#include "paddle/fluid/operators/detail/send_recv.grpc.pb.h" -#include "paddle/fluid/operators/detail/send_recv.pb.h" -#include "paddle/fluid/operators/detail/sendrecvop_utils.h" +#include "paddle/fluid/operators/distributed/grpc_service.h" +#include "paddle/fluid/operators/distributed/request_handler.h" +#include "paddle/fluid/operators/distributed/rpc_server.h" +#include "paddle/fluid/operators/distributed/send_recv.grpc.pb.h" +#include "paddle/fluid/operators/distributed/send_recv.pb.h" +#include "paddle/fluid/operators/distributed/sendrecvop_utils.h" #include "paddle/fluid/platform/profiler.h" namespace paddle { namespace operators { -namespace detail { +namespace distributed { class RequestBase; @@ -84,6 +84,6 @@ class AsyncGRPCServer final : public RPCServer { std::map> rpc_reqs_; }; -}; // namespace detail +}; // namespace distributed }; // namespace operators }; // namespace paddle diff --git a/paddle/fluid/operators/detail/grpc_service.h b/paddle/fluid/operators/distributed/grpc_service.h similarity index 83% rename from paddle/fluid/operators/detail/grpc_service.h rename to paddle/fluid/operators/distributed/grpc_service.h index e0505c2b9d..cdc4e7b792 100644 --- a/paddle/fluid/operators/detail/grpc_service.h +++ b/paddle/fluid/operators/distributed/grpc_service.h @@ -23,7 +23,7 @@ #include #include #include -#include "paddle/fluid/operators/detail/variable_response.h" +#include "paddle/fluid/operators/distributed/variable_response.h" #include "paddle/fluid/platform/profiler.h" @@ -42,24 +42,25 @@ class ServerContext; // Support parsing/unparsing of tensorflow::VariableResponse. // Wire-format is identical to RecvVariableResponse. template <> -class SerializationTraits { +class SerializationTraits { public: static Status Serialize( - const paddle::operators::detail::VariableResponse& msg, + const paddle::operators::distributed::VariableResponse& msg, grpc_byte_buffer** bp, bool* own_buffer) { PADDLE_ENFORCE(false, "SerializationTraits::Serialize not implemented!"); return Status(); } - static Status Deserialize(grpc_byte_buffer* buffer, - paddle::operators::detail::VariableResponse* msg, - int max_message_size = INT_MAX) { + static Status Deserialize( + grpc_byte_buffer* buffer, + paddle::operators::distributed::VariableResponse* msg, + int max_message_size = INT_MAX) { if (buffer == nullptr) { return Status(StatusCode::INTERNAL, "No payload"); } Status result = g_core_codegen_interface->ok(); if (result.ok()) { - paddle::operators::detail::GrpcByteSource source(buffer); + paddle::operators::distributed::GrpcByteSource source(buffer); int ret = msg->Parse(&source); if (ret != 0) { result = Status(StatusCode::INTERNAL, "VariableResponse parse error"); @@ -73,16 +74,17 @@ class SerializationTraits { namespace paddle { namespace operators { -namespace detail { +namespace distributed { enum class GrpcMethod { kSendVariable, kGetVariable, kPrefetchVariable, + kCheckpointNotify, }; static const int kGrpcNumMethods = - static_cast(GrpcMethod::kPrefetchVariable) + 1; + static_cast(GrpcMethod::kCheckpointNotify) + 1; inline const char* GrpcMethodName(GrpcMethod id) { switch (id) { @@ -92,6 +94,8 @@ inline const char* GrpcMethodName(GrpcMethod id) { return "/sendrecv.SendRecvService/GetVariable"; case GrpcMethod::kPrefetchVariable: return "/sendrecv.SendRecvService/PrefetchVariable"; + case GrpcMethod::kCheckpointNotify: + return "/sendrecv.SendRecvService/CheckpointNotify"; } // Shouldn't be reached. @@ -118,6 +122,6 @@ class GrpcService final { }; }; -} // namespace detail +} // namespace distributed } // namespace operators } // namespace paddle diff --git a/paddle/fluid/operators/detail/proto_encoder_helper.h b/paddle/fluid/operators/distributed/proto_encoder_helper.h similarity index 98% rename from paddle/fluid/operators/detail/proto_encoder_helper.h rename to paddle/fluid/operators/distributed/proto_encoder_helper.h index d91d054b25..2fab02e32f 100644 --- a/paddle/fluid/operators/detail/proto_encoder_helper.h +++ b/paddle/fluid/operators/distributed/proto_encoder_helper.h @@ -26,7 +26,7 @@ limitations under the License. */ namespace paddle { namespace operators { -namespace detail { +namespace distributed { char* EncodeVarint32(char* dst, uint32_t v) { // Operate on characters as unsigneds @@ -144,6 +144,6 @@ class ProtoEncodeHelper { char* limit_; // Just for CHECKs }; -} // namespace detail +} // namespace distributed } // namespace operators } // namespace paddle diff --git a/paddle/fluid/operators/detail/request_handler.h b/paddle/fluid/operators/distributed/request_handler.h similarity index 87% rename from paddle/fluid/operators/detail/request_handler.h rename to paddle/fluid/operators/distributed/request_handler.h index a2d08747d5..271306d5d2 100644 --- a/paddle/fluid/operators/detail/request_handler.h +++ b/paddle/fluid/operators/distributed/request_handler.h @@ -31,16 +31,23 @@ namespace paddle { namespace operators { -namespace detail { +namespace distributed { constexpr char kRequestSend[] = "RequestSend"; constexpr char kRequestGet[] = "RequestGet"; constexpr char kRequestPrefetch[] = "RequestPrefetch"; +constexpr char kRequestCheckpoint[] = "RequestCheckpoint"; +constexpr char kRequestPassBarrier[] = "RequestPassBarrier"; #define LISTEN_TERMINATE_MESSAGE "TERMINATE@RECV" #define BATCH_BARRIER_MESSAGE "BATCH_BARRIER@RECV" #define FETCH_BARRIER_MESSAGE "FETCH_BARRIER@RECV" #define COMPLETE_MESSAGE "COMPLETE@RECV" +#define BEGIN_PASS_MESSAGE "BEGIN_PASS@RECV" +#define END_PASS_MESSAGE "END_PASS@RECV" + +#define CHECKPOINT_SAVE_MESSAGE "SAVE@CHECKPOINTNOTIFY" +#define CHECKPOINT_LOAD_MESSAGE "LOAD@CHECKPOINTNOTIFY" class RPCServer; @@ -69,6 +76,11 @@ class RequestHandler { prefetch_var_name_to_prepared_ctx_ = g; } + void SetCheckpointNotifyPreparedCtx( + std::shared_ptr g) { + checkpoint_prepared_ctx_ = g; + } + // Used for async. void SetGradToPreparedCtx( std::unordered_map< @@ -115,6 +127,8 @@ class RequestHandler { std::unordered_map>* prefetch_var_name_to_prepared_ctx_; + // used for checkpoint notify + std::shared_ptr checkpoint_prepared_ctx_; // Used for async. std::unordered_mapIncreaseBatchBarrier(kRequestSend); - } else if (varname == COMPLETE_MESSAGE) { - VLOG(3) << "sync: recv complete message"; - rpc_server_->DecreaseClientNum(); + } else if (varname == BEGIN_PASS_MESSAGE) { + VLOG(3) << "sync: recv begin pass message"; + rpc_server_->WaitCond(kRequestSend); + rpc_server_->BeginPass(); } else { VLOG(3) << "sync: received var_name: " << varname; - if (sync_mode_) { - rpc_server_->WaitCond(kRequestSend); - } + rpc_server_->WaitCond(kRequestSend); + VLOG(3) << "sync: processing received var: " << varname; if (invar == nullptr) { LOG(ERROR) << "sync: Can not find server side var: " << varname; @@ -86,21 +91,21 @@ bool RequestGetHandler::Handle(const std::string& varname, framework::Variable** outvar, const std::string& out_var_name) { VLOG(4) << "RequestGetHandler:" << varname; - - if (varname != FETCH_BARRIER_MESSAGE) { - if (sync_mode_) { + if (sync_mode_) { + if (varname == FETCH_BARRIER_MESSAGE) { + VLOG(3) << "sync: recv fetch barrier message"; + rpc_server_->IncreaseBatchBarrier(kRequestGet); + } else if (varname == END_PASS_MESSAGE) { + rpc_server_->EndPass(); + } else { rpc_server_->WaitCond(kRequestGet); + *outvar = scope_->FindVar(varname); + } + } else { + if (varname != FETCH_BARRIER_MESSAGE && varname != END_PASS_MESSAGE) { + *outvar = scope_->FindVar(varname); } - *outvar = scope_->FindVar(varname); - return true; - } - - // FETCH_BARRIER_MESSAGE - if (sync_mode_) { - VLOG(3) << "sync: recv fetch barrier message"; - rpc_server_->IncreaseBatchBarrier(kRequestGet); } - return true; } @@ -119,6 +124,24 @@ bool RequestPrefetchHandler::Handle(const std::string& varname, return true; } -} // namespace detail +bool RequestCheckpointHandler::Handle(const std::string& varname, + framework::Scope* scope, + framework::Variable* invar, + framework::Variable** outvar, + const std::string& out_var_name) { + PADDLE_ENFORCE( + checkpoint_notify_id != -1, + "when checkpoint_notify_id = -1, there should be no RPC invoke."); + + auto* lt_var = scope->FindVar(LOOKUP_TABLE_PATH)->GetMutable(); + lt_var->clear(); + lt_var->append(out_var_name); + VLOG(4) << "RequestCheckpointHandler update var kLookupTablePath to: " + << out_var_name; + executor_->RunPreparedContext(checkpoint_prepared_ctx_.get(), scope); + return true; +} + +} // namespace distributed } // namespace operators } // namespace paddle diff --git a/paddle/fluid/operators/detail/request_handler_impl.h b/paddle/fluid/operators/distributed/request_handler_impl.h similarity index 79% rename from paddle/fluid/operators/detail/request_handler_impl.h rename to paddle/fluid/operators/distributed/request_handler_impl.h index 3f77c09a95..87185500f2 100644 --- a/paddle/fluid/operators/detail/request_handler_impl.h +++ b/paddle/fluid/operators/distributed/request_handler_impl.h @@ -28,11 +28,11 @@ #include "paddle/fluid/framework/scope.h" #include "paddle/fluid/framework/selected_rows.h" #include "paddle/fluid/framework/var_type.h" -#include "paddle/fluid/operators/detail/request_handler.h" +#include "paddle/fluid/operators/distributed/request_handler.h" namespace paddle { namespace operators { -namespace detail { +namespace distributed { class RequestSendHandler final : public RequestHandler { public: @@ -66,6 +66,21 @@ class RequestPrefetchHandler final : public RequestHandler { const std::string& out_var_name = "") override; }; -} // namespace detail +class RequestCheckpointHandler final : public RequestHandler { + public: + explicit RequestCheckpointHandler(bool sync_mode, int checkpoint_notify_id) + : RequestHandler(sync_mode) { + this->checkpoint_notify_id = checkpoint_notify_id; + } + virtual ~RequestCheckpointHandler() {} + bool Handle(const std::string& varname, framework::Scope* scope, + framework::Variable* var, framework::Variable** outvar, + const std::string& out_var_name = "") override; + + private: + int checkpoint_notify_id; +}; + +} // namespace distributed } // namespace operators } // namespace paddle diff --git a/paddle/fluid/operators/detail/rpc_client.cc b/paddle/fluid/operators/distributed/rpc_client.cc similarity index 75% rename from paddle/fluid/operators/detail/rpc_client.cc rename to paddle/fluid/operators/distributed/rpc_client.cc index 9a791403e3..b5ec9fe536 100644 --- a/paddle/fluid/operators/detail/rpc_client.cc +++ b/paddle/fluid/operators/distributed/rpc_client.cc @@ -12,15 +12,19 @@ // See the License for the specific language governing permissions and // limitations under the License. -#include "paddle/fluid/operators/detail/rpc_client.h" +#include "paddle/fluid/operators/distributed/rpc_client.h" +#include "gflags/gflags.h" + +// default to 3min to avoid temprary network failures. +DEFINE_int32(rpc_deadline, 180000, "deadline timeouts for rpc"); namespace paddle { namespace operators { -namespace detail { +namespace distributed { std::once_flag RPCClient::init_flag_; std::unique_ptr RPCClient::rpc_client_(nullptr); -} // namespace detail +} // namespace distributed } // namespace operators } // namespace paddle diff --git a/paddle/fluid/operators/detail/rpc_client.h b/paddle/fluid/operators/distributed/rpc_client.h similarity index 66% rename from paddle/fluid/operators/detail/rpc_client.h rename to paddle/fluid/operators/distributed/rpc_client.h index 47c6ffb4fd..6479d3a97b 100644 --- a/paddle/fluid/operators/detail/rpc_client.h +++ b/paddle/fluid/operators/distributed/rpc_client.h @@ -15,14 +15,17 @@ #pragma once #include +#include "gflags/gflags.h" #include "paddle/fluid/framework/data_type.h" #include "paddle/fluid/framework/lod_tensor.h" #include "paddle/fluid/framework/scope.h" +DECLARE_int32(rpc_deadline); + namespace paddle { namespace operators { -namespace detail { +namespace distributed { class RPCClient { public: @@ -32,35 +35,44 @@ class RPCClient { const platform::DeviceContext& ctx, const framework::Scope& scope, const std::string& var_name, - int64_t time_out = rpc_time_out) = 0; + int64_t time_out = FLAGS_rpc_deadline) = 0; virtual bool AsyncGetVar(const std::string& ep, const platform::DeviceContext& ctx, const framework::Scope& scope, const std::string& var_name, - int64_t time_out = rpc_time_out) = 0; + int64_t time_out = FLAGS_rpc_deadline) = 0; virtual bool AsyncPrefetchVar(const std::string& ep, const platform::DeviceContext& ctx, const framework::Scope& scope, const std::string& in_var_name, const std::string& out_var_name, - int64_t time_out = rpc_time_out) = 0; + int64_t time_out = FLAGS_rpc_deadline) = 0; virtual void AsyncSendBatchBarrier(const std::string& ep, - int64_t time_out = rpc_time_out) = 0; + int64_t time_out = FLAGS_rpc_deadline) = 0; virtual void AsyncSendFetchBarrier(const std::string& ep, - int64_t time_out = rpc_time_out) = 0; + int64_t time_out = FLAGS_rpc_deadline) = 0; - // SendComplete tells all the server that current trainer have no more data - // to train, so that the pserver can reduce it's barrier count, and continue - // to train with other trainers. - virtual void SendComplete() = 0; + virtual void AsyncCheckpointNotify(const std::string& ep, + const std::string& dir, + int64_t time_out = FLAGS_rpc_deadline) = 0; - virtual void Wait() = 0; + virtual void AsyncSendBeginPass(const std::string& ep, + int64_t time_out = FLAGS_rpc_deadline) = 0; + + virtual void AsyncSendEndPass(const std::string& ep, + int64_t time_out = FLAGS_rpc_deadline) = 0; - static constexpr int64_t rpc_time_out = 120 * 1000; + // BeginePass/EndPass tells all the pserver that start/end a pass, so that + // the pserver can increase/reduce it's barrier count, and continue to train + // with other trainers. + virtual void SendBeginPass() = 0; + virtual void SendEndPass() = 0; + + virtual void Wait() = 0; template static RPCClient* GetInstance() { @@ -84,6 +96,6 @@ class RPCClient { static std::once_flag init_flag_; static std::unique_ptr rpc_client_; }; -} // namespace detail +} // namespace distributed } // namespace operators } // namespace paddle diff --git a/paddle/fluid/operators/detail/rpc_server.cc b/paddle/fluid/operators/distributed/rpc_server.cc similarity index 75% rename from paddle/fluid/operators/detail/rpc_server.cc rename to paddle/fluid/operators/distributed/rpc_server.cc index cd0fe96e23..d49ee34eea 100644 --- a/paddle/fluid/operators/detail/rpc_server.cc +++ b/paddle/fluid/operators/distributed/rpc_server.cc @@ -17,11 +17,11 @@ #include #include -#include "paddle/fluid/operators/detail/rpc_server.h" +#include "paddle/fluid/operators/distributed/rpc_server.h" namespace paddle { namespace operators { -namespace detail { +namespace distributed { void RPCServer::ShutDown() { LOG(INFO) << "RPCServer ShutDown "; @@ -44,14 +44,16 @@ void RPCServer::SavePort() const { void RPCServer::WaitBarrier(const std::string& rpc_name) { std::unique_lock lock(this->mutex_); barrier_cond_.wait(lock, [this, &rpc_name] { - return (barrier_counter_[rpc_name] >= client_num_ || exit_flag_.load()); + return ((barrier_counter_[rpc_name] == client_num_ && client_num_ != 0) || + exit_flag_.load()); }); - VLOG(3) << "batch_barrier_:" << barrier_counter_[rpc_name]; + VLOG(3) << "batch_barrier_: " << rpc_name << " " + << barrier_counter_[rpc_name]; } void RPCServer::IncreaseBatchBarrier(const std::string rpc_name) { - VLOG(3) << "RPCServer begin IncreaseBatchBarrier " << rpc_name; + VLOG(4) << "RPCServer begin IncreaseBatchBarrier " << rpc_name; int b = 0; std::unique_lock lock(mutex_); b = ++barrier_counter_[rpc_name]; @@ -62,10 +64,25 @@ void RPCServer::IncreaseBatchBarrier(const std::string rpc_name) { } } -void RPCServer::DecreaseClientNum() { +void RPCServer::BeginPass() { + VLOG(4) << "RPCServer begin increase pass barrier"; + { + std::unique_lock lock(mutex_); + client_num_++; + VLOG(4) << "increase client_num to: " << client_num_; + } + barrier_cond_.notify_all(); +} + +void RPCServer::EndPass() { + VLOG(4) << "RPCServer begin increase pass barrier"; { std::unique_lock lock(mutex_); client_num_--; + VLOG(4) << "decrease client_num to: " << client_num_; + if (cur_cond_.load() == rpc_cond_map_[kRequestGet]) { + barrier_counter_[kRequestGet]--; + } } barrier_cond_.notify_all(); } @@ -100,7 +117,7 @@ void RPCServer::SetCond(const std::string& rpc_name) { } void RPCServer::WaitCond(const std::string& rpc_name) { - VLOG(3) << "RPCServer WaitCond " << rpc_name; + VLOG(4) << "RPCServer WaitCond " << rpc_name; int cond = 0; { std::unique_lock lock(mutex_); @@ -112,6 +129,6 @@ void RPCServer::WaitCond(const std::string& rpc_name) { lock, [=] { return (cur_cond_.load() == cond || exit_flag_.load()); }); } -} // namespace detail +} // namespace distributed } // namespace operators } // namespace paddle diff --git a/paddle/fluid/operators/detail/rpc_server.h b/paddle/fluid/operators/distributed/rpc_server.h similarity index 93% rename from paddle/fluid/operators/detail/rpc_server.h rename to paddle/fluid/operators/distributed/rpc_server.h index 2e3342428c..833991c8aa 100644 --- a/paddle/fluid/operators/detail/rpc_server.h +++ b/paddle/fluid/operators/distributed/rpc_server.h @@ -19,11 +19,11 @@ #include // NOLINT #include #include -#include "paddle/fluid/operators/detail/request_handler.h" +#include "paddle/fluid/operators/distributed/request_handler.h" namespace paddle { namespace operators { -namespace detail { +namespace distributed { class RPCServer { public: @@ -43,6 +43,9 @@ class RPCServer { bool IsExit() { return exit_flag_.load(); } int GetSelectedPort() const { return selected_port_; } + + int GetClientNum() const; + void SavePort() const; // RegisterRPC, register the rpc method name to a handler @@ -60,7 +63,10 @@ class RPCServer { void SetCond(const std::string& rpc_name); void WaitCond(const std::string& rpc_name); void IncreaseBatchBarrier(const std::string rpc_name); - void DecreaseClientNum(); + + void BeginPass(); + void EndPass(); + void ResetBarrierCounter(); protected: @@ -86,6 +92,6 @@ class RPCServer { friend class RequestHandler; }; -}; // namespace detail +}; // namespace distributed }; // namespace operators }; // namespace paddle diff --git a/paddle/fluid/operators/detail/rpc_server_test.cc b/paddle/fluid/operators/distributed/rpc_server_test.cc similarity index 87% rename from paddle/fluid/operators/detail/rpc_server_test.cc rename to paddle/fluid/operators/distributed/rpc_server_test.cc index 463a7b80cf..a0693cffab 100644 --- a/paddle/fluid/operators/detail/rpc_server_test.cc +++ b/paddle/fluid/operators/distributed/rpc_server_test.cc @@ -22,18 +22,18 @@ limitations under the License. */ #include "paddle/fluid/framework/operator.h" #include "paddle/fluid/operators/detail/macros.h" -#include "paddle/fluid/operators/detail/request_handler_impl.h" -#include "paddle/fluid/operators/detail/rpc_client.h" -#include "paddle/fluid/operators/detail/rpc_server.h" +#include "paddle/fluid/operators/distributed/request_handler_impl.h" +#include "paddle/fluid/operators/distributed/rpc_client.h" +#include "paddle/fluid/operators/distributed/rpc_server.h" namespace framework = paddle::framework; namespace platform = paddle::platform; -namespace detail = paddle::operators::detail; +namespace distributed = paddle::operators::distributed; USE_OP(lookup_table); -std::unique_ptr g_rpc_service; -std::unique_ptr g_req_handler; +std::unique_ptr g_rpc_service; +std::unique_ptr g_req_handler; framework::BlockDesc* AppendPrefetchBlcok(framework::ProgramDesc* program) { auto root_block = program->MutableBlock(0); @@ -113,19 +113,21 @@ void StartServer() { g_req_handler->SetScope(&scope); g_req_handler->SetExecutor(&exe); - g_rpc_service->RegisterRPC(detail::kRequestPrefetch, g_req_handler.get()); + g_rpc_service->RegisterRPC(distributed::kRequestPrefetch, + g_req_handler.get()); g_req_handler->SetRPCServer(g_rpc_service.get()); std::thread server_thread( - std::bind(&detail::RPCServer::StartServer, g_rpc_service.get())); + std::bind(&distributed::RPCServer::StartServer, g_rpc_service.get())); server_thread.join(); } TEST(PREFETCH, CPU) { - g_req_handler.reset(new detail::RequestPrefetchHandler(true)); + g_req_handler.reset(new distributed::RequestPrefetchHandler(true)); g_rpc_service.reset(new RPCSERVER_T("127.0.0.1:0", 1)); - detail::RPCClient* client = detail::RPCClient::GetInstance(); + distributed::RPCClient* client = + distributed::RPCClient::GetInstance(); std::thread server_thread(StartServer); g_rpc_service->WaitServerReady(); diff --git a/paddle/fluid/operators/detail/send_recv.proto b/paddle/fluid/operators/distributed/send_recv.proto similarity index 97% rename from paddle/fluid/operators/detail/send_recv.proto rename to paddle/fluid/operators/distributed/send_recv.proto index 54cb93e04d..e0902320cf 100644 --- a/paddle/fluid/operators/detail/send_recv.proto +++ b/paddle/fluid/operators/distributed/send_recv.proto @@ -25,6 +25,8 @@ service SendRecvService { rpc GetVariable(VariableMessage) returns (VariableMessage) {} // pre-fetch variable by given variable name and Ids rpc PrefetchVariable(VariableMessage) returns (VariableMessage) {} + + rpc CheckpointNotify(VariableMessage) returns (VoidMessage) {} } // VariableMessage is serialized paddle variable message. diff --git a/paddle/fluid/operators/detail/sendrecvop_utils.cc b/paddle/fluid/operators/distributed/sendrecvop_utils.cc similarity index 95% rename from paddle/fluid/operators/detail/sendrecvop_utils.cc rename to paddle/fluid/operators/distributed/sendrecvop_utils.cc index 507b465435..98129d9f10 100644 --- a/paddle/fluid/operators/detail/sendrecvop_utils.cc +++ b/paddle/fluid/operators/distributed/sendrecvop_utils.cc @@ -12,7 +12,7 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. */ -#include "paddle/fluid/operators/detail/sendrecvop_utils.h" +#include "paddle/fluid/operators/distributed/sendrecvop_utils.h" #ifdef PADDLE_WITH_CUDA #include @@ -23,14 +23,14 @@ limitations under the License. */ #include "google/protobuf/io/coded_stream.h" #include "google/protobuf/io/zero_copy_stream.h" #include "paddle/fluid/framework/data_type.h" -#include "paddle/fluid/operators/detail/bytebuffer_stream.h" -#include "paddle/fluid/operators/detail/proto_encoder_helper.h" -#include "paddle/fluid/operators/detail/variable_response.h" +#include "paddle/fluid/operators/distributed/bytebuffer_stream.h" +#include "paddle/fluid/operators/distributed/proto_encoder_helper.h" +#include "paddle/fluid/operators/distributed/variable_response.h" #include "paddle/fluid/platform/profiler.h" namespace paddle { namespace operators { -namespace detail { +namespace distributed { using VarMsg = sendrecv::VariableMessage; @@ -222,11 +222,11 @@ void DeserializeFromByteBuffer(const ::grpc::ByteBuffer& msg, const platform::DeviceContext& ctx, const framework::Scope* scope, framework::Variable** var) { - operators::detail::VariableResponse resp(scope, &ctx); + operators::distributed::VariableResponse resp(scope, &ctx); PADDLE_ENFORCE(resp.Parse(msg) == 0, "parse bytebuffer to tensor error!"); *var = resp.GetVar(); } -} // namespace detail +} // namespace distributed } // namespace operators } // namespace paddle diff --git a/paddle/fluid/operators/detail/sendrecvop_utils.h b/paddle/fluid/operators/distributed/sendrecvop_utils.h similarity index 92% rename from paddle/fluid/operators/detail/sendrecvop_utils.h rename to paddle/fluid/operators/distributed/sendrecvop_utils.h index bd16bf1dab..fe25e73fa6 100644 --- a/paddle/fluid/operators/detail/sendrecvop_utils.h +++ b/paddle/fluid/operators/distributed/sendrecvop_utils.h @@ -25,12 +25,12 @@ limitations under the License. */ #include "paddle/fluid/framework/tensor_util.h" #include "paddle/fluid/framework/var_type.h" -#include "paddle/fluid/operators/detail/send_recv.grpc.pb.h" -#include "paddle/fluid/operators/detail/send_recv.pb.h" +#include "paddle/fluid/operators/distributed/send_recv.grpc.pb.h" +#include "paddle/fluid/operators/distributed/send_recv.pb.h" namespace paddle { namespace operators { -namespace detail { +namespace distributed { typedef void (*DestroyCallback)(void*); @@ -61,6 +61,6 @@ inline std::type_index ToTypeIndex(sendrecv::VariableMessage::Type type) { } } -} // namespace detail +} // namespace distributed } // namespace operators } // namespace paddle diff --git a/paddle/fluid/operators/detail/variable_response.cc b/paddle/fluid/operators/distributed/variable_response.cc similarity index 94% rename from paddle/fluid/operators/detail/variable_response.cc rename to paddle/fluid/operators/distributed/variable_response.cc index 24cb91a3bb..45832c60bf 100644 --- a/paddle/fluid/operators/detail/variable_response.cc +++ b/paddle/fluid/operators/distributed/variable_response.cc @@ -12,7 +12,7 @@ // See the License for the specific language governing permissions and // limitations under the License. -#include "paddle/fluid/operators/detail/variable_response.h" +#include "paddle/fluid/operators/distributed/variable_response.h" #include #include @@ -22,12 +22,12 @@ #endif #include "paddle/fluid/platform/profiler.h" -#include "paddle/fluid/operators/detail/send_recv.pb.h" -#include "paddle/fluid/operators/detail/sendrecvop_utils.h" +#include "paddle/fluid/operators/distributed/send_recv.pb.h" +#include "paddle/fluid/operators/distributed/sendrecvop_utils.h" namespace paddle { namespace operators { -namespace detail { +namespace distributed { enum WireType { WIRETYPE_VARINT = 0, @@ -76,6 +76,8 @@ bool ReadRaw(::google::protobuf::io::CodedInputStream* input, if (total_written + size_to_write > length) { size_to_write = length - total_written; } + // This log is useful to see how long a internal block size is of rpc. + VLOG(7) << "copy " << size_to_write << " data to CUDAPlace"; memory::Copy(boost::get(place), reinterpret_cast(p), cpu, data, size_to_write, gpu_dev_ctx.stream()); @@ -103,6 +105,8 @@ bool ReadRaw(::google::protobuf::io::CodedInputStream* input, } // TODO(gongwb): can we avoid copy? platform::CPUPlace cpu; + // This log is useful to see how long a internal block size is of rpc. + VLOG(7) << "copy " << size_to_write << " data to CPUPlace"; memory::Copy(cpu, reinterpret_cast(p), cpu, data, size_to_write); p += size_to_write; @@ -158,13 +162,13 @@ bool VariableResponse::CopySelectRowsTensorData( slr->set_height(meta_.slr_height()); auto* tensor = slr->mutable_value(); tensor->Resize(dims); - PADDLE_ENFORCE_EQ( - static_cast(tensor->numel()), - length / framework::SizeOfType( - paddle::operators::detail::ToTypeIndex(meta_.data_type()))); + PADDLE_ENFORCE_EQ(static_cast(tensor->numel()), + length / framework::SizeOfType( + paddle::operators::distributed::ToTypeIndex( + meta_.data_type()))); void* tensor_data = tensor->mutable_data( ctx.GetPlace(), - paddle::operators::detail::ToTypeIndex(meta_.data_type())); + paddle::operators::distributed::ToTypeIndex(meta_.data_type())); if (!ReadRaw(input, ctx, tensor->place(), tensor_data, length)) { return false; @@ -480,6 +484,6 @@ int VariableResponse::Parse(Source* source) { return 0; } -}; // namespace detail +}; // namespace distributed }; // namespace operators }; // namespace paddle diff --git a/paddle/fluid/operators/detail/variable_response.h b/paddle/fluid/operators/distributed/variable_response.h similarity index 92% rename from paddle/fluid/operators/detail/variable_response.h rename to paddle/fluid/operators/distributed/variable_response.h index 69cfd784f8..1db4a0a522 100644 --- a/paddle/fluid/operators/detail/variable_response.h +++ b/paddle/fluid/operators/distributed/variable_response.h @@ -22,17 +22,17 @@ #include "paddle/fluid/framework/selected_rows.h" #include "paddle/fluid/framework/var_type.h" -#include "paddle/fluid/operators/detail/send_recv.grpc.pb.h" -#include "paddle/fluid/operators/detail/send_recv.pb.h" +#include "paddle/fluid/operators/distributed/send_recv.grpc.pb.h" +#include "paddle/fluid/operators/distributed/send_recv.pb.h" #include "google/protobuf/io/coded_stream.h" #include "google/protobuf/io/zero_copy_stream.h" #include "paddle/fluid/framework/tensor.h" -#include "paddle/fluid/operators/detail/bytebuffer_stream.h" +#include "paddle/fluid/operators/distributed/bytebuffer_stream.h" namespace paddle { namespace operators { -namespace detail { +namespace distributed { class VariableResponse { public: @@ -99,6 +99,6 @@ class VariableResponse { sendrecv::VariableMessage meta_; }; -}; // namespace detail +}; // namespace distributed }; // namespace operators }; // namespace paddle diff --git a/paddle/fluid/operators/elementwise_add_mkldnn_op.cc b/paddle/fluid/operators/elementwise_add_mkldnn_op.cc new file mode 100644 index 0000000000..1a5427b392 --- /dev/null +++ b/paddle/fluid/operators/elementwise_add_mkldnn_op.cc @@ -0,0 +1,190 @@ +/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#include "paddle/fluid/memory/memcpy.h" +#include "paddle/fluid/operators/elementwise_add_op.h" +#include "paddle/fluid/operators/elementwise_op_function.h" + +#include "paddle/fluid/platform/mkldnn_helper.h" + +namespace paddle { +namespace operators { + +using framework::DataLayout; +using framework::Tensor; +using mkldnn::memory; +using mkldnn::reorder; +using mkldnn::primitive; +using mkldnn::stream; +using mkldnn::sum; + +template +class EltwiseAddMKLDNNKernel : public framework::OpKernel { + public: + void Compute(const framework::ExecutionContext& ctx) const override { + auto& dev_ctx = + ctx.template device_context(); + const auto& mkldnn_engine = dev_ctx.GetEngine(); + + auto* x = ctx.Input("X"); + auto* y = ctx.Input("Y"); + auto* z = ctx.Output("Out"); + const T* x_data = x->data(); + const T* y_data = y->data(); + T* z_data = z->mutable_data(ctx.GetPlace()); + + int axis = ctx.Attr("axis"); + + auto x_dims = x->dims(); + auto y_dims = y->dims(); + auto z_dims = z->dims(); + + // Execute default elementwise_add operator when + // broadcast operations need to performed. + if (x_dims != y_dims) { + auto sum_func = [](T a, T b) -> T { return a + b; }; + + TransformFunctor + functor( + x, y, z, + ctx.template device_context(), + sum_func); + + axis = (axis == -1 ? x_dims.size() - y_dims.size() : axis); + PADDLE_ENFORCE(axis >= 0 && axis < x_dims.size(), + "Axis should be in range [0, x_dims)"); + + trim_trailing_singular_dims(&y_dims); + axis = (y_dims.size() == 0) ? x_dims.size() : axis; + + int pre, n, post; + get_mid_dims(x_dims, y_dims, axis, &pre, &n, &post); + + if (post == 1) { + functor.RunRowWise(n, pre); + } else { + functor.RunMidWise(n, pre, post); + } + z->set_layout(DataLayout::kMKLDNN); + z->set_format(x->format()); + } else { + PADDLE_ENFORCE(x->layout() == DataLayout::kMKLDNN && + x->format() != memory::format::format_undef, + "Wrong layout/format set for X tensor"); + PADDLE_ENFORCE(y->layout() == DataLayout::kMKLDNN && + y->format() != memory::format::format_undef, + "Wrong layout/format set for Y tensor"); + + std::vector src_x_tz = framework::vectorize2int(x_dims); + std::vector src_y_tz = framework::vectorize2int(y_dims); + std::vector dst_tz = framework::vectorize2int(z_dims); + + std::vector srcs_pd; + std::vector srcs; + std::vector scales = {1.0f, 1.0f}; + + auto src_x_pd = memory::primitive_desc( + {{src_x_tz}, memory::data_type::f32, x->format()}, mkldnn_engine); + auto src_y_pd = memory::primitive_desc( + {{src_y_tz}, memory::data_type::f32, y->format()}, mkldnn_engine); + auto src_x_memory = + memory(src_x_pd, paddle::platform::to_void_cast(x_data)); + auto src_y_memory = + memory(src_y_pd, paddle::platform::to_void_cast(y_data)); + + srcs_pd.push_back(src_x_pd); + srcs_pd.push_back(src_y_pd); + srcs.push_back(src_x_memory); + srcs.push_back(src_y_memory); + + auto dst_md = + memory::desc({dst_tz}, memory::data_type::f32, memory::format::any); + + // create primitive descriptor for sum + auto sum_pd = sum::primitive_desc(dst_md, scales, srcs_pd); + + // create mkldnn memory for dst + memory dst_memory = memory(sum_pd.dst_primitive_desc(), z_data); + + std::vector inputs; + inputs.push_back(srcs[0]); + inputs.push_back(srcs[1]); + + // create sum primitive + auto sum_prim = sum(sum_pd, inputs, dst_memory); + + std::vector pipeline; + pipeline.push_back(sum_prim); + stream(stream::kind::eager).submit(pipeline).wait(); + + z->set_layout(DataLayout::kMKLDNN); + z->set_format( + (memory::format)dst_memory.get_primitive_desc().desc().data.format); + } + } +}; + +template +class EltwiseAddMKLDNNGradKernel : public framework::OpKernel { + public: + void Compute(const framework::ExecutionContext& ctx) const override { + using Tensor = framework::Tensor; + + auto* x = ctx.Input("X"); + auto* y = ctx.Input("Y"); + auto* out = ctx.Input("Out"); + auto* dout = ctx.Input(framework::GradVarName("Out")); + auto* dx = ctx.Output(framework::GradVarName("X")); + auto* dy = ctx.Output(framework::GradVarName("Y")); + int axis = ctx.Attr("axis"); + + auto set_mkldnn_format = [](Tensor* in, const Tensor* out) { + in->set_layout(DataLayout::kMKLDNN); + in->set_format(out->format()); + }; + + if (x->dims() == y->dims()) { + auto blas = math::GetBlas(ctx); + if (dx) { + blas.VCOPY(dout->numel(), dout->data(), + dx->mutable_data(ctx.GetPlace())); + set_mkldnn_format(dx, dout); + } + + if (dy) { + blas.VCOPY(dout->numel(), dout->data(), + dy->mutable_data(ctx.GetPlace())); + set_mkldnn_format(dy, dout); + } + } else { + // Execute default kernel when broadcast is needed + ElemwiseGradCompute, IdentityGrad>( + ctx, *x, *y, *out, *dout, axis, dx, dy, IdentityGrad(), + IdentityGrad()); + } + } +}; + +} // namespace operators +} // namespace paddle + +namespace ops = paddle::operators; + +REGISTER_OP_KERNEL(elementwise_add, MKLDNN, ::paddle::platform::CPUPlace, + ops::EltwiseAddMKLDNNKernel) + +REGISTER_OP_KERNEL(elementwise_add_grad, MKLDNN, ::paddle::platform::CPUPlace, + ops::EltwiseAddMKLDNNGradKernel) diff --git a/paddle/fluid/operators/elementwise_mul_op.cc b/paddle/fluid/operators/elementwise_mul_op.cc index ba343909bb..7cd67e74de 100644 --- a/paddle/fluid/operators/elementwise_mul_op.cc +++ b/paddle/fluid/operators/elementwise_mul_op.cc @@ -15,7 +15,7 @@ limitations under the License. */ #include "paddle/fluid/operators/elementwise_mul_op.h" #include "paddle/fluid/operators/elementwise_op.h" namespace ops = paddle::operators; -REGISTER_ELEMWISE_OP(elementwise_mul, "Mul", "Out = X \\odot\\ Y"); +REGISTER_ELEMWISE_OP(elementwise_mul, "Mul", "Out = X \\\\odot Y"); REGISTER_OP_CPU_KERNEL( elementwise_mul, ops::ElementwiseMulKernel, diff --git a/paddle/fluid/operators/elementwise_op.h b/paddle/fluid/operators/elementwise_op.h index 12364fff96..bb88970e42 100644 --- a/paddle/fluid/operators/elementwise_op.h +++ b/paddle/fluid/operators/elementwise_op.h @@ -14,8 +14,12 @@ limitations under the License. */ #pragma once #include +#include "paddle/fluid/framework/data_layout.h" #include "paddle/fluid/framework/op_registry.h" #include "paddle/fluid/framework/operator.h" +#ifdef PADDLE_WITH_MKLDNN +#include "paddle/fluid/platform/mkldnn_helper.h" +#endif namespace paddle { namespace operators { @@ -40,6 +44,21 @@ class ElementwiseOp : public framework::OperatorWithKernel { ctx->SetOutputDim("Out", x_dim); ctx->ShareLoD("X", /*->*/ "Out"); } + + framework::OpKernelType GetExpectedKernelType( + const framework::ExecutionContext& ctx) const override { + auto input_data_type = + framework::ToDataType(ctx.Input("X")->type()); + +#ifdef PADDLE_WITH_MKLDNN + if (platform::CanMKLDNNBeUsed(ctx)) { + return framework::OpKernelType(input_data_type, ctx.GetPlace(), + framework::DataLayout::kMKLDNN, + framework::LibraryType::kMKLDNN); + } +#endif + return framework::OpKernelType(input_data_type, ctx.GetPlace()); + } }; class ElementwiseOpInferVarType : public framework::VarTypeInference { @@ -65,6 +84,8 @@ class ElementwiseOpMaker : public framework::OpProtoAndCheckerMaker { "for broadcasting Y onto X.") .SetDefault(-1) .EqualGreaterThan(-1); + AddAttr("use_mkldnn", "(bool, default false). Used by MKLDNN.") + .SetDefault(false); AddComment(string::Sprintf(R"DOC( Limited Elementwise %s Operator @@ -138,6 +159,21 @@ class ElementwiseOpGrad : public framework::OperatorWithKernel { ctx->SetOutputDim(y_grad_name, y_dims); } } + + framework::OpKernelType GetExpectedKernelType( + const framework::ExecutionContext& ctx) const override { + auto input_data_type = + framework::ToDataType(ctx.Input("X")->type()); + +#ifdef PADDLE_WITH_MKLDNN + if (platform::CanMKLDNNBeUsed(ctx)) { + return framework::OpKernelType(input_data_type, ctx.GetPlace(), + framework::DataLayout::kMKLDNN, + framework::LibraryType::kMKLDNN); + } +#endif + return framework::OpKernelType(input_data_type, ctx.GetPlace()); + } }; } // namespace operators } // namespace paddle diff --git a/paddle/fluid/operators/fc_mkldnn_op.cc b/paddle/fluid/operators/fc_mkldnn_op.cc index 847b7b0c12..99fa659a35 100644 --- a/paddle/fluid/operators/fc_mkldnn_op.cc +++ b/paddle/fluid/operators/fc_mkldnn_op.cc @@ -115,6 +115,7 @@ class MKLDNNMemory { template class FCMKLDNNOpKernel : public paddle::framework::OpKernel { + public: void Compute(const paddle::framework::ExecutionContext& ctx) const override { PADDLE_ENFORCE(paddle::platform::is_cpu_place(ctx.GetPlace()), "It must use CPUPlace."); diff --git a/paddle/fluid/operators/fetch_barrier_op.cc b/paddle/fluid/operators/fetch_barrier_op.cc index 98b051afb5..02beb80fc8 100644 --- a/paddle/fluid/operators/fetch_barrier_op.cc +++ b/paddle/fluid/operators/fetch_barrier_op.cc @@ -42,8 +42,8 @@ class FetchBarrierOp : public framework::OperatorBase { // For profiling platform::RecordEvent record_event(Type(), &ctx); - detail::RPCClient* rpc_client = - detail::RPCClient::GetInstance(); + distributed::RPCClient* rpc_client = + distributed::RPCClient::GetInstance(); rpc_client->Wait(); diff --git a/paddle/fluid/operators/gaussian_random_batch_size_like_op.cc b/paddle/fluid/operators/gaussian_random_batch_size_like_op.cc index 8050f61d45..4a97428148 100644 --- a/paddle/fluid/operators/gaussian_random_batch_size_like_op.cc +++ b/paddle/fluid/operators/gaussian_random_batch_size_like_op.cc @@ -36,11 +36,12 @@ class GaussianRandomBatchSizeLikeOpMaker : public BatchSizeLikeOpMaker { void Apply() override { AddAttr("mean", "(float, default 0.0) " - "mean of random tensor.") + "The mean (or center) of the gaussian distribution.") .SetDefault(.0f); AddAttr("std", "(float, default 1.0) " - "std of random tensor.") + "The standard deviation (std, or spread) of the " + "gaussian distribution.") .SetDefault(1.0f); AddAttr("seed", "(int, default 0) " @@ -55,9 +56,11 @@ class GaussianRandomBatchSizeLikeOpMaker : public BatchSizeLikeOpMaker { .SetDefault(framework::proto::VarType::FP32); AddComment(R"DOC( -GaussianRandom Operator. Used to initialize tensors with gaussian random generator. +The defalut mean of the distribution is 0. and defalut standard +deviation (std) of the distribution is 1.. Uers can set mean and std +by input arguments. )DOC"); } }; diff --git a/paddle/fluid/operators/gaussian_random_mkldnn_op.cc b/paddle/fluid/operators/gaussian_random_mkldnn_op.cc new file mode 100644 index 0000000000..76b00b396c --- /dev/null +++ b/paddle/fluid/operators/gaussian_random_mkldnn_op.cc @@ -0,0 +1,55 @@ +/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#include +#include "paddle/fluid/operators/mean_op.h" + +namespace paddle { +namespace operators { + +using framework::DataLayout; +template +class GaussianMKLDNNKernel : public paddle::framework::OpKernel { + public: + void Compute(const framework::ExecutionContext& context) const override { + float mean = context.Attr("mean"); + float std = context.Attr("std"); + auto* tensor = context.Output("Out"); + T* data = tensor->mutable_data(context.GetPlace()); + + unsigned int seed = static_cast(context.Attr("seed")); + std::minstd_rand engine; + if (seed == 0) { + seed = std::random_device()(); + } + engine.seed(seed); + std::normal_distribution dist(mean, std); + int64_t size = tensor->numel(); + for (int64_t i = 0; i < size; ++i) { + data[i] = dist(engine); + } + + // The format of output is set as the mkldnn's format + // TODO(@mozga-intel) The format of matrix sets inside the another layers. + tensor->set_layout(DataLayout::kMKLDNN); + tensor->set_format(mkldnn::memory::format::oihw); + } +}; +} // namespace operators +} // namespace paddle + +namespace ops = paddle::operators; + +REGISTER_OP_KERNEL(gaussian_random, MKLDNN, ::paddle::platform::CPUPlace, + ops::GaussianMKLDNNKernel); diff --git a/paddle/fluid/operators/gaussian_random_op.cc b/paddle/fluid/operators/gaussian_random_op.cc index 815c1bb509..1488aab192 100644 --- a/paddle/fluid/operators/gaussian_random_op.cc +++ b/paddle/fluid/operators/gaussian_random_op.cc @@ -15,6 +15,10 @@ limitations under the License. */ #include #include "paddle/fluid/framework/op_registry.h" +#ifdef PADDLE_WITH_MKLDNN +#include "paddle/fluid/platform/mkldnn_helper.h" +#endif + namespace paddle { namespace operators { @@ -62,9 +66,20 @@ class GaussianRandomOp : public framework::OperatorWithKernel { protected: framework::OpKernelType GetExpectedKernelType( const framework::ExecutionContext& ctx) const override { + framework::LibraryType library{framework::LibraryType::kPlain}; + framework::DataLayout layout{framework::DataLayout::kAnyLayout}; + +#ifdef PADDLE_WITH_MKLDNN + if (library == framework::LibraryType::kPlain && + platform::CanMKLDNNBeUsed(ctx)) { + library = framework::LibraryType::kMKLDNN; + layout = framework::DataLayout::kMKLDNN; + } +#endif + return framework::OpKernelType( static_cast(ctx.Attr("dtype")), - ctx.device_context()); + ctx.device_context(), layout, library); } }; @@ -95,7 +110,9 @@ class GaussianRandomOpMaker : public framework::OpProtoAndCheckerMaker { "(int, default 5(FP32)) " "Output data type.") .SetDefault(framework::proto::VarType::FP32); - + AddAttr("use_mkldnn", + "(bool, default false) Only used in mkldnn kernel") + .SetDefault(false); AddComment(R"DOC( GaussianRandom Operator. diff --git a/paddle/fluid/operators/gen_nccl_id_op.cc b/paddle/fluid/operators/gen_nccl_id_op.cc index f824eee4e7..697c239e59 100644 --- a/paddle/fluid/operators/gen_nccl_id_op.cc +++ b/paddle/fluid/operators/gen_nccl_id_op.cc @@ -22,7 +22,7 @@ limitations under the License. */ #include "paddle/fluid/framework/op_registry.h" #include "paddle/fluid/framework/threadpool.h" #include "paddle/fluid/operators/detail/macros.h" -#include "paddle/fluid/operators/detail/request_handler_impl.h" +#include "paddle/fluid/operators/distributed/request_handler_impl.h" #include "paddle/fluid/platform/nccl_helper.h" namespace paddle { @@ -60,7 +60,8 @@ class GenNCCLIdOp : public framework::OperatorBase { std::vector endpoint_list = Attr>("endpoint_list"); - detail::RPCClient* client = detail::RPCClient::GetInstance(); + distributed::RPCClient* client = + distributed::RPCClient::GetInstance(); for (auto& ep : endpoint_list) { VLOG(3) << "sending nccl id to " << ep; @@ -80,11 +81,11 @@ class GenNCCLIdOp : public framework::OperatorBase { // NOTE: Can not use unique_ptr here because the default // deleter will call GRPC Server's base class's dtor and // that will cause a wired crash. - detail::RequestSendHandler rpc_h(true); - std::unique_ptr rpc_service( + distributed::RequestSendHandler rpc_h(true); + std::unique_ptr rpc_service( new RPCSERVER_T(endpoint, 1)); - rpc_service->RegisterRPC(detail::kRequestSend, &rpc_h); + rpc_service->RegisterRPC(distributed::kRequestSend, &rpc_h); rpc_h.SetRPCServer(rpc_service.get()); framework::ProgramDesc empty_program; @@ -95,11 +96,11 @@ class GenNCCLIdOp : public framework::OperatorBase { rpc_h.SetExecutor(&executor); std::thread server_thread( - std::bind(&detail::RPCServer::StartServer, rpc_service.get())); + std::bind(&distributed::RPCServer::StartServer, rpc_service.get())); - rpc_service->SetCond(detail::kRequestSend); + rpc_service->SetCond(distributed::kRequestSend); VLOG(3) << "start getting nccl id from trainer 0..."; - rpc_service->WaitBarrier(detail::kRequestSend); + rpc_service->WaitBarrier(distributed::kRequestSend); VLOG(3) << "got nccl id and stop server..."; rpc_service->ShutDown(); VLOG(3) << "rpc server stopped"; diff --git a/paddle/fluid/operators/layer_norm_op.cc b/paddle/fluid/operators/layer_norm_op.cc index ab097d31e9..14ce1da2e9 100644 --- a/paddle/fluid/operators/layer_norm_op.cc +++ b/paddle/fluid/operators/layer_norm_op.cc @@ -62,36 +62,33 @@ class LayerNormOp : public framework::OperatorWithKernel { class LayerNormOpMaker : public framework::OpProtoAndCheckerMaker { public: void Make() override { - AddInput("X", "(LoDTensor) The input tensor."); + AddInput("X", "The input tensor."); AddInput("Scale", - "(Tensor, optional) Scale is a 1-dimensional tensor of size " + "(optional) Scale is a 1-dimensional tensor of size " "H(`begin_norm_axis` splits the tensor(`X`) to a matrix [N,H])." "It is applied to the output.") .AsDispensable(); AddInput("Bias", - "(Tensor, optional) Bias is a 1-dimensional tensor of size " + "(optional) Bias is a 1-dimensional tensor of size " "H(`begin_norm_axis` splits the tensor(`X`) to a matrix [N,H])." "It is applied to the output.") .AsDispensable(); - AddOutput("Y", "(LoDTensor) Result after normalization."); - AddOutput("Mean", "(Tensor) Mean of the current mini batch.") - .AsIntermediate(); - AddOutput("Variance", "(Tensor) Variance of the current mini batch.") + AddOutput("Y", "Result after normalization."); + AddOutput("Mean", "Mean of the current mini batch.").AsIntermediate(); + AddOutput("Variance", "Variance of the current mini batch.") .AsIntermediate(); AddAttr("epsilon", - "(float, default 1e-5) Constant for " - "numerical stability") + "Constant for numerical stability [default 1e-5].") .SetDefault(1e-5) .AddCustomChecker([](const float &epsilon) { PADDLE_ENFORCE(epsilon >= 0.0f && epsilon <= 0.001f, "'epsilon' should be between 0.0 and 0.001."); }); AddAttr("begin_norm_axis", - "(int default:1), the " - "axis of `begin_norm_axis ... Rank(X) - 1` will be " + "the axis of `begin_norm_axis ... Rank(X) - 1` will be " "normalized. `begin_norm_axis` splits the tensor(`X`) to a " - "matrix [N,H].") + "matrix [N,H]. [default 1].") .SetDefault(1) .AddCustomChecker([](const int &begin_norm_axis) { PADDLE_ENFORCE_GT(begin_norm_axis, 0, @@ -99,10 +96,14 @@ class LayerNormOpMaker : public framework::OpProtoAndCheckerMaker { }); AddComment(R"DOC( -Layer Normalization. -Layer Norm has been implemented as discussed in the paper: -https://arxiv.org/abs/1607.06450 -... +Assume feature vectors exist on dimensions +:attr:`begin_norm_axis ... rank(input)` and calculate the moment statistics +along these dimensions for each feature vector :math:`a` with size +:math:`H`, then normalize each feature vector using the corresponding +statistics. After that, apply learnable gain and bias on the normalized +tensor to scale and shift if :attr:`scale` and :attr:`shift` are set. + +Refer to `Layer Normalization `_ )DOC"); } }; diff --git a/paddle/fluid/operators/linear_chain_crf_op.cc b/paddle/fluid/operators/linear_chain_crf_op.cc index a711da3627..ea1ca7f59d 100644 --- a/paddle/fluid/operators/linear_chain_crf_op.cc +++ b/paddle/fluid/operators/linear_chain_crf_op.cc @@ -84,6 +84,7 @@ CRF. Please refer to http://www.cs.columbia.edu/~mcollins/fb.pdf and http://cseweb.ucsd.edu/~elkan/250Bwinter2012/loglinearCRFs.pdf for details. Equation: + 1. Denote Input(Emission) to this operator as $x$ here. 2. The first D values of Input(Transition) to this operator are for starting weights, denoted as $a$ here. @@ -106,6 +107,7 @@ Finally, the linear chain CRF operator outputs the logarithm of the conditional likelihood of each training sample in a mini-batch. NOTE: + 1. The feature function for a CRF is made up of the emission features and the transition features. The emission feature weights are NOT computed in this operator. They MUST be computed first before this operator is called. diff --git a/paddle/fluid/operators/listen_and_serv_op.cc b/paddle/fluid/operators/listen_and_serv_op.cc index 4d12278799..56e39649b4 100644 --- a/paddle/fluid/operators/listen_and_serv_op.cc +++ b/paddle/fluid/operators/listen_and_serv_op.cc @@ -21,14 +21,14 @@ limitations under the License. */ #include "paddle/fluid/operators/detail/macros.h" -#include "paddle/fluid/operators/detail/request_handler_impl.h" +#include "paddle/fluid/operators/distributed/request_handler_impl.h" #include "paddle/fluid/operators/listen_and_serv_op.h" #include "paddle/fluid/platform/profiler.h" namespace paddle { namespace operators { -void RunServer(std::shared_ptr service) { +void RunServer(std::shared_ptr service) { service->StartServer(); VLOG(4) << "RunServer thread end"; } @@ -99,19 +99,19 @@ static int64_t GetTimestamp() { void ListenAndServOp::RunSyncLoop( framework::Executor *executor, framework::ProgramDesc *program, framework::Scope *recv_scope, - const std::vector &prefetch_block_id_list) const { + const std::vector &prefetch_block_id_list, + const int checkpoint_point_block_id) const { size_t num_blocks = program->Size(); + auto optimize_blocks = + Attr>(kOptimizeBlocks); PADDLE_ENFORCE_GE(num_blocks, 2, "server program should have at least 2 blocks"); - std::vector optimize_block_id_list; - for (int blkid = 1; blkid < num_blocks; ++blkid) { - if (std::find(prefetch_block_id_list.begin(), prefetch_block_id_list.end(), - blkid) == prefetch_block_id_list.end()) { - optimize_block_id_list.push_back(blkid); - } + std::vector optimize_blocks_idx; + for (auto blk : optimize_blocks) { + optimize_blocks_idx.push_back(blk->ID()); } - auto optimize_prepared = executor->Prepare(*program, optimize_block_id_list); + auto optimize_prepared = executor->Prepare(*program, optimize_blocks_idx); // Insert placeholder for block0 which holds current op itself. optimize_prepared.insert( optimize_prepared.begin(), @@ -121,12 +121,12 @@ void ListenAndServOp::RunSyncLoop( while (true) { // Get from multiple trainers, we don't care about the order in which // the gradients arrives, just add suffix 0~n and merge the gradient. - rpc_service_->SetCond(detail::kRequestSend); - rpc_service_->WaitBarrier(detail::kRequestSend); + rpc_service_->SetCond(distributed::kRequestSend); + rpc_service_->WaitBarrier(distributed::kRequestSend); if (rpc_service_->IsExit()) { LOG(WARNING) << "get exit!rpc_processor break!"; - rpc_service_->SetCond(detail::kRequestGet); + rpc_service_->SetCond(distributed::kRequestGet); break; } @@ -134,14 +134,14 @@ void ListenAndServOp::RunSyncLoop( // and this will still work. // The optimize blocks which have the same parent ID would run parallel // TODO(Yancey1989): need to use ParallelExecutor for future - int32_t last_parent_blkid = program->Block(1).Parent(); + int32_t last_parent_blkid = optimize_blocks[0]->Parent(); std::vector parallel_blkids; - parallel_blkids.push_back(1); + parallel_blkids.push_back(optimize_blocks[0]->ID()); double ts = GetTimestamp(); - for (size_t i = 1; i < optimize_block_id_list.size(); ++i) { + for (size_t i = 1; i < optimize_blocks.size(); ++i) { // skip the first optimize block because it is already in the // parallel_blkids. - int blkid = optimize_block_id_list[i]; + int blkid = optimize_blocks[i]->ID(); if (program->Block(blkid).Parent() != last_parent_blkid) { ParallelExecuteBlocks(parallel_blkids, executor, optimize_prepared, program, recv_scope); @@ -154,18 +154,18 @@ void ListenAndServOp::RunSyncLoop( recv_scope); VLOG(2) << "run all blocks spent " << GetTimestamp() - ts << "(ms)"; - rpc_service_->SetCond(detail::kRequestGet); - rpc_service_->WaitBarrier(detail::kRequestGet); + rpc_service_->SetCond(distributed::kRequestGet); + rpc_service_->WaitBarrier(distributed::kRequestGet); rpc_service_->ResetBarrierCounter(); // reset received sparse vars to avoid reuse it in the next mini-batch - dynamic_cast(request_send_handler_.get()) + dynamic_cast(request_send_handler_.get()) ->ResetSparseVarRecorder(); } // while(true) } void ListenAndServOp::RunAsyncLoop(framework::Executor *executor, - framework::ProgramDesc *program) const { - VLOG(3) << "RunAsyncLoop in"; + framework::ProgramDesc *program, + framework::Scope *recv_scope) const { // grad name to block id std::unordered_map grad_to_block_id; std::unordered_map id_to_grad; @@ -192,6 +192,10 @@ void ListenAndServOp::RunAsyncLoop(framework::Executor *executor, block_list.push_back(blkid); } auto optimize_prepared = executor->Prepare(*program, block_list); + // execute global block if needed + if (block_list[0] == 1 && id_to_grad.count(1) == 0) { + executor->RunPreparedContext(optimize_prepared[0].get(), recv_scope); + } std::unordered_map> grad_to_prepared_ctx; @@ -203,10 +207,9 @@ void ListenAndServOp::RunAsyncLoop(framework::Executor *executor, request_get_handler_->SetGradToPreparedCtx(&grad_to_prepared_ctx); request_prefetch_handler_->SetGradToPreparedCtx(&grad_to_prepared_ctx); - VLOG(3) << "RunAsyncLoop into while"; while (true) { if (rpc_service_->IsExit()) { - LOG(INFO) << "get exit!rpc_processor break!"; + VLOG(4) << "get exit!rpc_processor break!"; break; } @@ -215,19 +218,21 @@ void ListenAndServOp::RunAsyncLoop(framework::Executor *executor, } static void FillRequestCtx( - detail::RequestHandler *h, framework::Scope *scope, + distributed::RequestHandler *h, framework::Scope *scope, platform::DeviceContext *dev_ctx, framework::Executor *executor, framework::ProgramDesc *program, std::unordered_map> *prefetch_ctx, - detail::RPCServer *rpc_server) { + std::shared_ptr checkpoint_ctx, + distributed::RPCServer *rpc_server) { h->SetScope(scope); h->SetDevCtx(dev_ctx); h->SetExecutor(executor); h->SetProgram(program); h->SetPrefetchPreparedCtx(prefetch_ctx); h->SetRPCServer(rpc_server); + h->SetCheckpointNotifyPreparedCtx(checkpoint_ctx); } void ListenAndServOp::RunImpl(const framework::Scope &scope, @@ -243,26 +248,44 @@ void ListenAndServOp::RunImpl(const framework::Scope &scope, PADDLE_ENFORCE(!rpc_service_); std::string endpoint = Attr("endpoint"); + int checkpoint_block_id = Attr(kCheckpointBlockId); - LOG(INFO) << "sync_mode:" << sync_mode << ", fan_in:" << fan_in - << ", end_point:" << endpoint; + VLOG(4) << "sync_mode:" << sync_mode << ", fan_in:" << fan_in + << ", end_point:" << endpoint + << ", checkpoint_block_id: " << checkpoint_block_id; rpc_service_.reset(new RPCSERVER_T(endpoint, fan_in)); - request_send_handler_.reset(new detail::RequestSendHandler(sync_mode)); - request_get_handler_.reset(new detail::RequestGetHandler(sync_mode)); + request_send_handler_.reset(new distributed::RequestSendHandler(sync_mode)); + request_get_handler_.reset(new distributed::RequestGetHandler(sync_mode)); request_prefetch_handler_.reset( - new detail::RequestPrefetchHandler(sync_mode)); - - rpc_service_->RegisterRPC(detail::kRequestSend, request_send_handler_.get()); - rpc_service_->RegisterRPC(detail::kRequestGet, request_get_handler_.get()); - rpc_service_->RegisterRPC(detail::kRequestPrefetch, + new distributed::RequestPrefetchHandler(sync_mode)); + request_checkpoint_handler_.reset(new distributed::RequestCheckpointHandler( + sync_mode, checkpoint_block_id)); + + rpc_service_->RegisterRPC(distributed::kRequestSend, + request_send_handler_.get()); + rpc_service_->RegisterRPC(distributed::kRequestGet, + request_get_handler_.get()); + rpc_service_->RegisterRPC(distributed::kRequestPrefetch, request_prefetch_handler_.get()); - - auto *optimize_block = Attr(kOptimizeBlock); - auto *program = optimize_block->Program(); + rpc_service_->RegisterRPC(distributed::kRequestCheckpoint, + request_checkpoint_handler_.get()); + + auto optimize_blocks = + Attr>(kOptimizeBlocks); + PADDLE_ENFORCE(optimize_blocks.size() >= 1, + "optimize blocks should be 1 at least on the pserver side."); + auto *program = optimize_blocks[0]->Program(); framework::Executor executor(dev_place); + std::shared_ptr ckpt_pre_context = nullptr; + if (checkpoint_block_id != -1) { + auto ctx = executor.Prepare(*program, checkpoint_block_id); + // see: https://stackoverflow.com/a/14856553 + ckpt_pre_context = std::move(ctx); + } + // prepare for prefetch std::vector prefetch_block_id_list; std::unordered_map block_id_to_prefetch_var_name; @@ -293,13 +316,15 @@ void ListenAndServOp::RunImpl(const framework::Scope &scope, prefetch_var_name_to_prepared_ctx[prefetch_var_name] = prefetch_prepared[i]; } - auto f = std::bind(FillRequestCtx, std::placeholders::_1, &recv_scope, - &dev_ctx, &executor, program, - &prefetch_var_name_to_prepared_ctx, rpc_service_.get()); + auto f = + std::bind(FillRequestCtx, std::placeholders::_1, &recv_scope, &dev_ctx, + &executor, program, &prefetch_var_name_to_prepared_ctx, + ckpt_pre_context, rpc_service_.get()); f(request_send_handler_.get()); f(request_get_handler_.get()); f(request_prefetch_handler_.get()); + f(request_checkpoint_handler_.get()); // start the server listening after all member initialized. server_thread_.reset(new std::thread(RunServer, rpc_service_)); @@ -313,9 +338,10 @@ void ListenAndServOp::RunImpl(const framework::Scope &scope, // Write to a file of server selected port for python use. SavePort(); if (sync_mode) { - RunSyncLoop(&executor, program, &recv_scope, prefetch_block_id_list); + RunSyncLoop(&executor, program, &recv_scope, prefetch_block_id_list, + checkpoint_block_id); } else { - RunAsyncLoop(&executor, program); + RunAsyncLoop(&executor, program, &recv_scope); } } @@ -337,18 +363,23 @@ class ListenAndServOpMaker : public framework::OpProtoAndCheckerMaker { "a map from grad name to it's optimize block id") .SetDefault({}); AddAttr("sync_mode", "if works at sync_mode or not").SetDefault(true); - AddAttr(kOptimizeBlock, - "BlockID to run on server side."); + AddAttr>( + kOptimizeBlocks, "Optimize blocks to run on server side.") + .SetDefault({}); AddAttr>(kPrefetchVarNameToBlockId, "prefetch blocks to run on server side.") .SetDefault({}); AddAttr("Fanin", "How many clients send to this server.") .SetDefault(1); + AddAttr(kCheckpointBlockId, + "BolckID to run save checkpoint on pserer.") + .SetDefault(-1); } }; void SignalHandler::StopAndExit(int signal_num) { - VLOG(3) << "Catch interrupt signal: " << signal_num << ", program will exit"; + // Do not use VLOG here for the device for printing maybe already released. + // exit will release interal allocated resoureces. exit(0); } diff --git a/paddle/fluid/operators/listen_and_serv_op.h b/paddle/fluid/operators/listen_and_serv_op.h index 46c3a19e20..978969cc51 100644 --- a/paddle/fluid/operators/listen_and_serv_op.h +++ b/paddle/fluid/operators/listen_and_serv_op.h @@ -24,16 +24,17 @@ limitations under the License. */ #include "paddle/fluid/framework/lod_tensor.h" #include "paddle/fluid/framework/op_registry.h" #include "paddle/fluid/framework/threadpool.h" -#include "paddle/fluid/operators/detail/request_handler.h" -#include "paddle/fluid/operators/detail/rpc_server.h" +#include "paddle/fluid/operators/distributed/request_handler.h" +#include "paddle/fluid/operators/distributed/rpc_server.h" namespace paddle { namespace operators { -constexpr char kOptimizeBlock[] = "OptimizeBlock"; +constexpr char kOptimizeBlocks[] = "optimize_blocks"; constexpr char kPrefetchVarNameToBlockId[] = "prefetch_var_name_to_block_id"; +constexpr char kCheckpointBlockId[] = "checkpint_block_id"; -void RunServer(std::shared_ptr service); +void RunServer(std::shared_ptr service); class ListenAndServOp : public framework::OperatorBase { public: @@ -47,10 +48,12 @@ class ListenAndServOp : public framework::OperatorBase { void RunSyncLoop(framework::Executor* executor, framework::ProgramDesc* program, framework::Scope* recv_scope, - const std::vector& prefetch_block_id_list) const; + const std::vector& prefetch_block_id_list, + const int checkpoint_point_block_id) const; void RunAsyncLoop(framework::Executor* executor, - framework::ProgramDesc* program) const; + framework::ProgramDesc* program, + framework::Scope* recv_scope) const; void SavePort() const; @@ -62,10 +65,13 @@ class ListenAndServOp : public framework::OperatorBase { const platform::Place& dev_place) const override; protected: - mutable std::shared_ptr rpc_service_; - mutable std::shared_ptr request_send_handler_; - mutable std::shared_ptr request_get_handler_; - mutable std::shared_ptr request_prefetch_handler_; + mutable std::shared_ptr rpc_service_; + mutable std::shared_ptr request_send_handler_; + mutable std::shared_ptr request_get_handler_; + mutable std::shared_ptr + request_prefetch_handler_; + mutable std::shared_ptr + request_checkpoint_handler_; mutable std::shared_ptr server_thread_; }; diff --git a/paddle/fluid/operators/load_op.cc b/paddle/fluid/operators/load_op.cc index 8f4b504927..ac35cf0b89 100644 --- a/paddle/fluid/operators/load_op.cc +++ b/paddle/fluid/operators/load_op.cc @@ -34,6 +34,8 @@ class LoadOp : public framework::OperatorBase { auto *dev_ctx = platform::DeviceContextPool::Instance().Get(place); platform::RecordEvent record_event(Type(), dev_ctx); + // FIXME(yuyang18): We save variable to local file now, but we should change + // it to save an output stream. auto filename = Attr("file_path"); std::ifstream fin(filename); PADDLE_ENFORCE(static_cast(fin), "Cannot open file %s for load op", @@ -44,9 +46,25 @@ class LoadOp : public framework::OperatorBase { PADDLE_ENFORCE(out_var != nullptr, "Output variable %s cannot be found", out_var_name); - auto *tensor = out_var->GetMutable(); + if (out_var->IsType()) { + LoadLodTensor(fin, place, out_var); + } else if (out_var->IsType()) { + LoadSelectedRows(fin, place, out_var); + } else { + PADDLE_ENFORCE( + false, + "Load only support LoDTensor and SelectedRows, %s has wrong type", + out_var_name); + } + } - DeserializeFromStream(fin, tensor, *dev_ctx); + void LoadLodTensor(std::istream &fin, const platform::Place &place, + framework::Variable *var) const { + // get device context from pool + platform::DeviceContextPool &pool = platform::DeviceContextPool::Instance(); + auto &dev_ctx = *pool.Get(place); + auto *tensor = var->GetMutable(); + DeserializeFromStream(fin, tensor, dev_ctx); auto load_as_fp16 = Attr("load_as_fp16"); auto in_dtype = framework::ToDataType(tensor->type()); @@ -63,18 +81,27 @@ class LoadOp : public framework::OperatorBase { &fp16_tensor); // reset output tensor - out_var->Clear(); - tensor = out_var->GetMutable(); + var->Clear(); + tensor = var->GetMutable(); tensor->set_lod(fp16_tensor.lod()); tensor->ShareDataWith(fp16_tensor); } } + + void LoadSelectedRows(std::istream &fin, const platform::Place &place, + framework::Variable *var) const { + auto *selectedRows = var->GetMutable(); + // get device context from pool + platform::DeviceContextPool &pool = platform::DeviceContextPool::Instance(); + auto &dev_ctx = *pool.Get(place); + framework::DeserializeFromStream(fin, selectedRows, dev_ctx); + } }; class LoadOpProtoMaker : public framework::OpProtoAndCheckerMaker { public: void Make() override { - AddOutput("Out", "The tensor need to be loaded"); + AddOutput("Out", "The LoDTensor / SelectedRows need to be loaded"); AddAttr( "load_as_fp16", "If true, the tensor will be first loaded and then " @@ -85,7 +112,9 @@ class LoadOpProtoMaker : public framework::OpProtoAndCheckerMaker { R"(Variable will be loaded from "file_path")") .AddCustomChecker( [](const std::string &path) { return !path.empty(); }); - AddComment("Load operator will load a tensor variable from disk file."); + AddComment( + "Load operator will load a LoDTensor / SelectedRows variable from disk " + "file."); } }; } // namespace operators diff --git a/paddle/fluid/operators/logical_op.cc b/paddle/fluid/operators/logical_op.cc index db109f5cd0..26970db8d2 100644 --- a/paddle/fluid/operators/logical_op.cc +++ b/paddle/fluid/operators/logical_op.cc @@ -146,6 +146,6 @@ REGISTER_UNARY_LOGICAL_OP(logical_not, "$$Out = !X$$"); REGISTER_UNARY_LOGICAL_KERNEL(logical_not, CPU, paddle::operators::LogicalNotFunctor); REGISTER_BINARY_LOGICAL_OP(logical_xor, - "$$Out = (X || Y) \\, \\&\\& \\, !(X \\&\\& Y)$$"); + "$$Out = (X || Y) \\&\\& !(X \\&\\& Y)$$"); REGISTER_BINARY_LOGICAL_KERNEL(logical_xor, CPU, paddle::operators::LogicalXorFunctor); diff --git a/paddle/fluid/operators/lstm_op.cc b/paddle/fluid/operators/lstm_op.cc index 4751e3e802..3225bf9bb6 100644 --- a/paddle/fluid/operators/lstm_op.cc +++ b/paddle/fluid/operators/lstm_op.cc @@ -184,34 +184,32 @@ Long-Short Term Memory (LSTM) Operator. The defalut implementation is diagonal/peephole connection (https://arxiv.org/pdf/1402.1128.pdf), the formula is as follows: -$$ -i_t = \sigma(W_{ix}x_{t} + W_{ih}h_{t-1} + W_{ic}c_{t-1} + b_i) \\ +$$ i_t = \\sigma(W_{ix}x_{t} + W_{ih}h_{t-1} + W_{ic}c_{t-1} + b_i) $$ -f_t = \sigma(W_{fx}x_{t} + W_{fh}h_{t-1} + W_{fc}c_{t-1} + b_f) \\ +$$ f_t = \\sigma(W_{fx}x_{t} + W_{fh}h_{t-1} + W_{fc}c_{t-1} + b_f) $$ -\tilde{c_t} = act_g(W_{cx}x_t + W_{ch}h_{t-1} + b_c) \\ +$$ \\tilde{c_t} = act_g(W_{cx}x_t + W_{ch}h_{t-1} + b_c) $$ -o_t = \sigma(W_{ox}x_{t} + W_{oh}h_{t-1} + W_{oc}c_t + b_o) \\ +$$ o_t = \\sigma(W_{ox}x_{t} + W_{oh}h_{t-1} + W_{oc}c_t + b_o) $$ -c_t = f_t \odot c_{t-1} + i_t \odot \tilde{c_t} \\ +$$ c_t = f_t \\odot c_{t-1} + i_t \\odot \\tilde{c_t} $$ -h_t = o_t \odot act_h(c_t) -$$ +$$ h_t = o_t \\odot act_h(c_t) $$ -where the W terms denote weight matrices (e.g. $W_{xi}$ is the matrix -of weights from the input gate to the input), $W_{ic}, W_{fc}, W_{oc}$ -are diagonal weight matrices for peephole connections. In our implementation, -we use vectors to reprenset these diagonal weight matrices. The b terms -denote bias vectors ($b_i$ is the input gate bias vector), $\sigma$ -is the non-line activations, such as logistic sigmoid function, and -$i, f, o$ and $c$ are the input gate, forget gate, output gate, -and cell activation vectors, respectively, all of which have the same size as -the cell output activation vector $h$. - -The $\odot$ is the element-wise product of the vectors. $act_g$ and $act_h$ -are the cell input and cell output activation functions and `tanh` is usually -used for them. $\tilde{c_t}$ is also called candidate hidden state, -which is computed based on the current input and the previous hidden state. +- W terms denote weight matrices (e.g. $W_{xi}$ is the matrix + of weights from the input gate to the input), $W_{ic}, W_{fc}, W_{oc}$ + are diagonal weight matrices for peephole connections. In our implementation, + we use vectors to reprenset these diagonal weight matrices. +- The b terms denote bias vectors ($b_i$ is the input gate bias vector). +- $\sigma$ is the non-line activations, such as logistic sigmoid function. +- $i, f, o$ and $c$ are the input gate, forget gate, output gate, + and cell activation vectors, respectively, all of which have the same size as + the cell output activation vector $h$. +- The $\odot$ is the element-wise product of the vectors. +- $act_g$ and $act_h$ are the cell input and cell output activation functions + and `tanh` is usually used for them. +- $\tilde{c_t}$ is also called candidate hidden state, + which is computed based on the current input and the previous hidden state. Set `use_peepholes` False to disable peephole connection. The formula is omitted here, please refer to the paper diff --git a/paddle/fluid/operators/math/CMakeLists.txt b/paddle/fluid/operators/math/CMakeLists.txt index bc788ef5e9..d2b772d113 100644 --- a/paddle/fluid/operators/math/CMakeLists.txt +++ b/paddle/fluid/operators/math/CMakeLists.txt @@ -55,13 +55,13 @@ math_library(matrix_bit_code) math_library(unpooling) math_library(vol2col) -cc_test(math_function_test SRCS math_function_test.cc) +cc_test(math_function_test SRCS math_function_test.cc DEPS math_function) cc_test(selected_rows_functor_test SRCS selected_rows_functor_test.cc DEPS selected_rows_functor) cc_test(im2col_test SRCS im2col_test.cc DEPS im2col) cc_test(vol2col_test SRCS vol2col_test.cc DEPS vol2col) cc_test(sequence_padding_test SRCS sequence_padding_test.cc DEPS sequence_padding) if(WITH_GPU) - nv_test(math_function_gpu_test SRCS math_function_test.cu) - nv_test(selected_rows_functor_gpu_test SRCS selected_rows_functor_test.cu DEPS selected_rows_functor) + nv_test(math_function_gpu_test SRCS math_function_test.cu DEPS math_function) + nv_test(selected_rows_functor_gpu_test SRCS selected_rows_functor_test.cu DEPS selected_rows_functor math_function) endif() cc_test(concat_test SRCS concat_test.cc DEPS concat) diff --git a/paddle/fluid/operators/math/blas.h b/paddle/fluid/operators/math/blas.h index 6207d14ecd..9f6c1e5c35 100644 --- a/paddle/fluid/operators/math/blas.h +++ b/paddle/fluid/operators/math/blas.h @@ -18,49 +18,17 @@ #include "paddle/fluid/framework/tensor.h" #ifdef PADDLE_WITH_MKLML -#include -#include -#include -#include +#include "paddle/fluid/platform/dynload/mklml.h" #endif #ifdef PADDLE_USE_OPENBLAS #include -#ifdef LAPACK_FOUND -#include -#endif -#endif - -#ifndef LAPACK_FOUND -extern "C" { -#include // NOLINT -int LAPACKE_sgetrf(int matrix_layout, int m, int n, float* a, int lda, - int* ipiv); -int LAPACKE_dgetrf(int matrix_layout, int m, int n, double* a, int lda, - int* ipiv); -int LAPACKE_sgetri(int matrix_layout, int n, float* a, int lda, - const int* ipiv); -int LAPACKE_dgetri(int matrix_layout, int n, double* a, int lda, - const int* ipiv); -} #endif namespace paddle { namespace operators { namespace math { -static void SetNumThreads(int num_threads) { -#ifdef PADDLE_USE_OPENBLAS - int real_num_threads = num_threads > 1 ? num_threads : 1; - openblas_set_num_threads(real_num_threads); -#elif defined(PADDLE_WITH_MKLML) - int real_num_threads = num_threads > 1 ? num_threads : 1; - mkl_set_num_threads(real_num_threads); -#else - PADDLE_ENFORCE(false, "To be implemented."); -#endif -} - /** * Matrix Descriptor of a memory buffer. * diff --git a/paddle/fluid/operators/math/blas_impl.h b/paddle/fluid/operators/math/blas_impl.h index ae20406bc2..2ce94cfc93 100644 --- a/paddle/fluid/operators/math/blas_impl.h +++ b/paddle/fluid/operators/math/blas_impl.h @@ -22,61 +22,109 @@ namespace math { template struct CBlas; +#ifdef PADDLE_WITH_MKLML template <> struct CBlas { template static void GEMM(ARGS... args) { - cblas_sgemm(args...); + platform::dynload::cblas_sgemm(args...); } template static void AXPY(ARGS... args) { - cblas_saxpy(args...); + platform::dynload::cblas_saxpy(args...); + } + + template + static void VCOPY(ARGS... args) { + platform::dynload::cblas_scopy(args...); + } + + template + static void GEMV(ARGS... args) { + platform::dynload::cblas_sgemv(args...); + } + + template + static void GEMM_BATCH(ARGS... args) { + platform::dynload::cblas_sgemm_batch(args...); } -#ifdef PADDLE_WITH_MKLML template static void VADD(ARGS... args) { - vsAdd(args...); + platform::dynload::vsAdd(args...); + } +}; + +template <> +struct CBlas { + template + static void GEMM(ARGS... args) { + platform::dynload::cblas_dgemm(args...); + } + + template + static void AXPY(ARGS... args) { + platform::dynload::cblas_daxpy(args...); } -#endif template static void VCOPY(ARGS... args) { - cblas_scopy(args...); + platform::dynload::cblas_dcopy(args...); } template static void GEMV(ARGS... args) { - cblas_sgemv(args...); + platform::dynload::cblas_dgemv(args...); } -#ifdef PADDLE_WITH_MKLML template static void GEMM_BATCH(ARGS... args) { - cblas_sgemm_batch(args...); + platform::dynload::cblas_dgemm_batch(args...); + } + + template + static void VADD(ARGS... args) { + platform::dynload::vdAdd(args...); } -#endif }; +#else + template <> -struct CBlas { +struct CBlas { template static void GEMM(ARGS... args) { - cblas_dgemm(args...); + cblas_sgemm(args...); } template static void AXPY(ARGS... args) { - cblas_daxpy(args...); + cblas_saxpy(args...); } -#ifdef PADDLE_WITH_MKLML template - static void VADD(ARGS... args) { - vdAdd(args...); + static void VCOPY(ARGS... args) { + cblas_scopy(args...); + } + + template + static void GEMV(ARGS... args) { + cblas_sgemv(args...); + } +}; + +template <> +struct CBlas { + template + static void GEMM(ARGS... args) { + cblas_dgemm(args...); + } + + template + static void AXPY(ARGS... args) { + cblas_daxpy(args...); } -#endif template static void VCOPY(ARGS... args) { @@ -87,15 +135,8 @@ struct CBlas { static void GEMV(ARGS... args) { cblas_dgemv(args...); } - -#ifdef PADDLE_WITH_MKLML - template - static void GEMM_BATCH(ARGS... args) { - cblas_dgemm_batch(args...); - } -#endif }; - +#endif template <> struct CBlas { static void GEMM(...) { PADDLE_THROW("float16 GEMM not supported on CPU"); } diff --git a/paddle/fluid/operators/math/concat.cc b/paddle/fluid/operators/math/concat.cc index cc69212466..55c8a472ac 100644 --- a/paddle/fluid/operators/math/concat.cc +++ b/paddle/fluid/operators/math/concat.cc @@ -70,21 +70,23 @@ template class ConcatGradFunctor { public: void operator()(const platform::CPUDeviceContext& context, - const framework::Tensor& input, const int axis, - std::vector* outputs) { + const framework::Tensor& input, + const std::vector& ref_inputs, + const int axis, std::vector* outputs) { // TODO(zcd): Add input data validity checking - int num = outputs->size(); + size_t num = outputs->size(); int input_rows = 1; - auto dim_0 = outputs->at(0).dims(); + auto dim_0 = ref_inputs[0]->dims(); for (int i = 0; i < axis; ++i) { input_rows *= dim_0[i]; } + int input_cols = 0; std::vector output_cols(outputs->size()); - for (int i = 0; i < num; ++i) { - int t_cols = outputs->at(i).numel() / input_rows; + for (size_t i = 0; i < num; ++i) { + int t_cols = ref_inputs[i]->numel() / input_rows; input_cols += t_cols; output_cols[i] = t_cols; } @@ -94,11 +96,14 @@ class ConcatGradFunctor { for (int k = 0; k < input_rows; ++k) { const T* src_ptr = input.data() + k * input_cols; int col_idx = 0; - for (int j = 0; j < num; ++j) { + for (size_t j = 0; j < num; ++j) { int col_len = output_cols[j]; - T* dst_ptr = outputs->at(j).data() + k * col_len; - memory::Copy(cpu_place, dst_ptr, cpu_place, src_ptr + col_idx, - sizeof(T) * col_len); + auto* out_tensor = outputs->at(j); + if (out_tensor != nullptr) { + T* dst_ptr = out_tensor->data() + k * col_len; + memory::Copy(cpu_place, dst_ptr, cpu_place, src_ptr + col_idx, + sizeof(T) * col_len); + } col_idx += col_len; } } diff --git a/paddle/fluid/operators/math/concat.cu b/paddle/fluid/operators/math/concat.cu index 4285d38dcd..5863d74fca 100644 --- a/paddle/fluid/operators/math/concat.cu +++ b/paddle/fluid/operators/math/concat.cu @@ -22,43 +22,24 @@ namespace paddle { namespace operators { namespace math { -template -__device__ T upper_bound(const T* first, T count, T val) { - const T* orig = first; - const T* it = nullptr; - T step = 0; - while (count > 0) { - it = first; - step = count / 2; - it += step; - if (!(val < *it)) { - first = ++it; - count -= step + 1; - } else { - count = step; - } - } - return first - orig; -} - template __global__ void KernelConcat(T** inputs, const int* input_cols, int col_size, const int output_rows, const int output_cols, T* output) { int tid_x = blockIdx.x * blockDim.x + threadIdx.x; - int segment = upper_bound(input_cols, col_size, tid_x) - 1; - - int curr_offset = input_cols[segment]; - int curr_segment = segment; + int curr_segment = 0; + int curr_offset = input_cols[0]; for (; tid_x < output_cols; tid_x += blockDim.x * gridDim.x) { - T curr_col_offset; - while ((curr_col_offset = input_cols[curr_segment + 1]) <= tid_x) { + int curr_col_offset = input_cols[curr_segment + 1]; + while (curr_col_offset <= tid_x) { curr_offset = curr_col_offset; ++curr_segment; + curr_col_offset = input_cols[curr_segment + 1]; } int local_col = tid_x - curr_offset; int segment_width = curr_col_offset - curr_offset; + T* input_ptr = inputs[curr_segment]; int tid_y = blockIdx.y * blockDim.y + threadIdx.y; for (; tid_y < output_rows; tid_y += blockDim.y * gridDim.y) @@ -89,23 +70,25 @@ __global__ void KernelConcatGrad(const T* input_data, const int in_row, const int in_col, const int* out_cols, int out_cols_size, T** outputs_data) { int tid_x = blockIdx.x * blockDim.x + threadIdx.x; - int segment = upper_bound(out_cols, out_cols_size, tid_x) - 1; - int curr_offset = out_cols[segment]; - int curr_segment = segment; + int curr_segment = 0; + int curr_offset = out_cols[0]; for (; tid_x < in_col; tid_x += blockDim.x * gridDim.x) { - T curr_col_offset; - while ((curr_col_offset = out_cols[curr_segment + 1]) <= tid_x) { + int curr_col_offset = out_cols[curr_segment + 1]; + while (curr_col_offset <= tid_x) { curr_offset = curr_col_offset; ++curr_segment; + curr_col_offset = out_cols[curr_segment + 1]; } int local_col = tid_x - curr_offset; int segment_width = curr_col_offset - curr_offset; T* output_ptr = outputs_data[curr_segment]; - int tid_y = blockIdx.y * blockDim.y + threadIdx.y; - for (; tid_y < in_row; tid_y += blockDim.y * gridDim.y) - output_ptr[tid_y * segment_width + local_col] = - input_data[tid_y * in_col + tid_x]; + if (output_ptr != nullptr) { + int tid_y = blockIdx.y * blockDim.y + threadIdx.y; + for (; tid_y < in_row; tid_y += blockDim.y * gridDim.y) + output_ptr[tid_y * segment_width + local_col] = + input_data[tid_y * in_col + tid_x]; + } } } @@ -118,10 +101,12 @@ __global__ void KernelConcatGrad(const T* input_data, const int in_row, int split = tid_x / fixed_out_col; int in_offset = tid_x - split * fixed_out_col; T* output_ptr = outputs_data[split]; - int tid_y = blockIdx.y * blockDim.y + threadIdx.y; - for (; tid_y < in_row; tid_y += blockDim.y * gridDim.y) - output_ptr[tid_y * fixed_out_col + in_offset] = - input_data[tid_y * in_col + tid_x]; + if (output_ptr != nullptr) { + int tid_y = blockIdx.y * blockDim.y + threadIdx.y; + for (; tid_y < in_row; tid_y += blockDim.y * gridDim.y) + output_ptr[tid_y * fixed_out_col + in_offset] = + input_data[tid_y * in_col + tid_x]; + } } } @@ -203,17 +188,18 @@ template class ConcatGradFunctor { public: void operator()(const platform::CUDADeviceContext& context, - const framework::Tensor& input, const int axis, - std::vector* outputs) { + const framework::Tensor& input, + const std::vector& ref_inputs, + const int axis, std::vector* outputs) { // TODO(zcd): Add input data validity checking int o_num = outputs->size(); int out_row = 1; - auto dim_0 = outputs->at(0).dims(); + auto dim_0 = ref_inputs[0]->dims(); for (int i = 0; i < axis; ++i) { out_row *= dim_0[i]; } - int out_col = outputs->at(0).numel() / out_row; + int out0_col = ref_inputs[0]->numel() / out_row; int in_col = 0, in_row = out_row; bool sameShape = true; @@ -223,13 +209,17 @@ class ConcatGradFunctor { outputs_cols[0] = 0; for (int i = 0; i < o_num; ++i) { - int t_col = outputs->at(i).numel() / out_row; + int t_col = ref_inputs.at(i)->numel() / out_row; if (sameShape) { - if (t_col != out_col) sameShape = false; + if (t_col != out0_col) sameShape = false; } in_col += t_col; outputs_cols[i + 1] = in_col; - outputs_ptr[i] = outputs->at(i).data(); + if (outputs->at(i) != nullptr) { + outputs_ptr[i] = outputs->at(i)->data(); + } else { + outputs_ptr[i] = nullptr; + } } T** dev_out_gpu_data = @@ -255,7 +245,7 @@ class ConcatGradFunctor { if (sameShape) { KernelConcatGrad<<>>( - input.data(), in_row, in_col, out_col, dev_out_gpu_data); + input.data(), in_row, in_col, out0_col, dev_out_gpu_data); } else { const int* dev_outs_col_data = outputs_cols.CUDAData(context.GetPlace()); KernelConcatGrad<<>>( diff --git a/paddle/fluid/operators/math/concat.h b/paddle/fluid/operators/math/concat.h index 041ce8bf8a..9e080f2e8b 100644 --- a/paddle/fluid/operators/math/concat.h +++ b/paddle/fluid/operators/math/concat.h @@ -57,7 +57,8 @@ template class ConcatGradFunctor { public: void operator()(const DeviceContext& context, const framework::Tensor& input, - const int axis, std::vector* outputs); + const std::vector& ref_inputs, + const int axis, std::vector* outputs); }; } // namespace math diff --git a/paddle/fluid/operators/math/detail/avx_functions.cc b/paddle/fluid/operators/math/detail/avx_functions.cc index b95109d3f7..5641f91452 100644 --- a/paddle/fluid/operators/math/detail/avx_functions.cc +++ b/paddle/fluid/operators/math/detail/avx_functions.cc @@ -17,7 +17,7 @@ limitations under the License. */ #include #include "paddle/fluid/operators/math/detail/activation_functions.h" // TODO(qingqing) refine this dependence -#include "paddle/cuda/src/avx_mathfun.h" +#include "paddle/legacy/cuda/src/avx_mathfun.h" namespace paddle { namespace operators { diff --git a/paddle/fluid/operators/math/math_function.cc b/paddle/fluid/operators/math/math_function.cc index d39154c6f8..c3387be6da 100644 --- a/paddle/fluid/operators/math/math_function.cc +++ b/paddle/fluid/operators/math/math_function.cc @@ -30,6 +30,7 @@ template struct SetConstant; template struct SetConstant; template struct SetConstant; template struct SetConstant; +template struct SetConstant; #define DEFINE_CPU_TRANS(RANK) \ template struct Transpose -#include -#include +#include "paddle/fluid/platform/dynload/mklml.h" #endif #ifdef PADDLE_USE_OPENBLAS #include -#ifdef LAPACK_FOUND -#include -#endif -#endif - -#ifndef LAPACK_FOUND -extern "C" { -#include // NOLINT -int LAPACKE_sgetrf(int matrix_layout, int m, int n, float* a, int lda, - int* ipiv); -int LAPACKE_dgetrf(int matrix_layout, int m, int n, double* a, int lda, - int* ipiv); -int LAPACKE_sgetri(int matrix_layout, int n, float* a, int lda, - const int* ipiv); -int LAPACKE_dgetri(int matrix_layout, int n, double* a, int lda, - const int* ipiv); -} #endif #include diff --git a/paddle/fluid/operators/mean_op.cc b/paddle/fluid/operators/mean_op.cc index 4881cff4a3..9e0bebd17c 100644 --- a/paddle/fluid/operators/mean_op.cc +++ b/paddle/fluid/operators/mean_op.cc @@ -33,12 +33,10 @@ class MeanOp : public framework::OperatorWithKernel { class MeanOpMaker : public framework::OpProtoAndCheckerMaker { public: void Make() override { - AddInput("X", "The input of mean op"); - AddOutput("Out", "The output of mean op").Reuse("X"); + AddInput("X", "(Tensor) The input of mean op"); + AddOutput("Out", "(Tensor) The output of mean op").Reuse("X"); AddComment(R"DOC( -Mean Operator. - -Out is a scalar which is the mean of all elements in X. +Mean Operator calculates the mean of all elements in X. )DOC"); } diff --git a/paddle/fluid/operators/merge_lod_tensor_op.cc b/paddle/fluid/operators/merge_lod_tensor_op.cc index a16861b3b7..2dc1467b0d 100644 --- a/paddle/fluid/operators/merge_lod_tensor_op.cc +++ b/paddle/fluid/operators/merge_lod_tensor_op.cc @@ -44,8 +44,10 @@ class MergeLoDTensorOp : public framework::OperatorBase { scope.FindVar(Output("Out"))->GetMutable(); auto level = static_cast(Attr("level")); - auto &mask_dim = mask.dims(); + PADDLE_ENFORCE(in_true.numel() || in_false.numel(), + "Input(InTrue) or Input(InFalse) should be initialized."); + auto &mask_dim = mask.dims(); std::unique_ptr cpu_mask{new framework::LoDTensor()}; if (platform::is_cpu_place(mask.place())) { cpu_mask->ShareDataWith(mask); @@ -59,19 +61,27 @@ class MergeLoDTensorOp : public framework::OperatorBase { } auto *mask_data = cpu_mask->data(); - int rank = in_true.dims().size(); - platform::Place place = in_true.place(); - std::type_index data_type = in_true.type(); - framework::DDim in_true_dims = - framework::slice_ddim(in_true.dims(), 1, rank); - + platform::Place place = dev_place; int64_t batch_size = in_true.dims()[0] + in_false.dims()[0]; - auto in_true_dim_vec = framework::vectorize(in_true_dims); - in_true_dim_vec.insert(in_true_dim_vec.begin(), batch_size); + std::type_index data_type = + in_true.IsInitialized() ? in_true.type() : in_false.type(); + int rank; + framework::DDim in_dims; + if (in_true.IsInitialized()) { + rank = in_true.dims().size(); + in_dims = framework::slice_ddim(in_true.dims(), 1, rank); + } else { + rank = in_false.dims().size(); + in_dims = framework::slice_ddim(in_false.dims(), 1, rank); + } + + auto in_dim_vec = framework::vectorize(in_dims); + in_dim_vec.insert(in_dim_vec.begin(), batch_size); - framework::DDim out_dims = framework::make_ddim(in_true_dim_vec); + framework::DDim out_dims = framework::make_ddim(in_dim_vec); out->Resize(out_dims); + out->mutable_data(place, data_type); auto *out_lod = out->mutable_lod(); diff --git a/paddle/fluid/operators/multiplex_op.cc b/paddle/fluid/operators/multiplex_op.cc index a4363fd25d..18ad46cb5e 100644 --- a/paddle/fluid/operators/multiplex_op.cc +++ b/paddle/fluid/operators/multiplex_op.cc @@ -62,26 +62,46 @@ class MultiplexOp : public framework::OperatorWithKernel { class MultiplexOpMaker : public framework::OpProtoAndCheckerMaker { public: void Make() override { - AddInput("Ids", "The index tensor of multiplex operator."); - AddInput("X", "The candidate tensors of multiplex operator.") + AddInput("Ids", + "Tensor, index variable which is a 2-D tensor with shape " + "[M, 1] where M is the batch size."); + AddInput("X", + "A list of variables to gather from. All variables have the same " + "shape and the rank is at least 2.") .AsDuplicable(); AddOutput("Out", "The output tensor of multiplex operator."); AddComment(R"DOC( -Multiplex Operator. - -Multiplex multiple tensors according to the index provided by the index tensor. - -Ids: the index tensor. -X[0 : N - 1]: the candidate tensors for output (N >= 2). -For each index i from 0 to batchSize - 1, the output is the i-th row of the +Referring to the given index variable, this layer selects rows from the +input variables to construct a multiplex variable. Assuming that there are +:math:`m` input variables and :math:`I_i` represents the i-th input +variable and :math:`i` is in [0, :math:`m`). All input variables are +tensors with same shape [:math:`d_0`, :math:`d_1`, ..., :math:`d_R`]. +Please note that rank of the input tensor should be at least 2. Each input +variable will be treated as a 2-D matrix with shape [:math:`M`, :math:`N`] +where :math:`M` for :math:`d_0` and :math:`N` for :math:`d_1` * :math:`d_2` +* ... * :math:`d_R`. Let :math:`I_i[j]` be the j-th row of the i-th input +variable. The given index variable should be a 2-D tensor with shape +[:math:`M`, 1]. Let `ID[i]` be the i-th index value of the index variable. +Then the output variable will be a tensor with shape [:math:`d_0`, +:math:`d_1`, ..., :math:`d_R`]. If we treat the output tensor as a 2-D +matrix with shape [:math:`M`, :math:`N`] and let :math:`O[i]` be the i-th +row of the matrix, then `O[i]` is equal to :math:`I_{ID[i]}[i]`. + +* Ids: the index tensor. + +* X[0 : N - 1]: the candidate tensors for output (N >= 2). + +* For each index i from 0 to batchSize - 1, the output is the i-th row of the the (Ids[i])-th tensor. For i-th row of the output tensor: -$$y[i] = x_{k}[i]$$ +$$ +y[i] = x_{k}[i] +$$ -where `y` is the output tensor, `x_{k}` is the k-th input tensor, -and `k = Ids[i]`. +where $y$ is the output tensor, $x_{k}$ is the k-th input tensor, +and $k = Ids[i]$. )DOC"); } diff --git a/paddle/fluid/operators/nccl_op_test.cu.cc b/paddle/fluid/operators/nccl_op_test.cu.cc index ef54d79fdf..d5fb7a12e5 100644 --- a/paddle/fluid/operators/nccl_op_test.cu.cc +++ b/paddle/fluid/operators/nccl_op_test.cu.cc @@ -19,7 +19,6 @@ limitations under the License. */ #include // NOLINT #include -#include "paddle/fluid/framework/init.h" #include "paddle/fluid/framework/op_desc.h" #include "paddle/fluid/framework/op_registry.h" #include "paddle/fluid/framework/program_desc.h" @@ -27,6 +26,7 @@ limitations under the License. */ #include "paddle/fluid/platform/device_context.h" #include "paddle/fluid/platform/enforce.h" #include "paddle/fluid/platform/gpu_info.h" +#include "paddle/fluid/platform/init.h" #include "paddle/fluid/platform/place.h" USE_NO_KERNEL_OP(ncclInit); diff --git a/paddle/fluid/operators/nce_op.cc b/paddle/fluid/operators/nce_op.cc index 06092e680a..e471f04662 100644 --- a/paddle/fluid/operators/nce_op.cc +++ b/paddle/fluid/operators/nce_op.cc @@ -128,8 +128,10 @@ class NCEOpMaker : public framework::OpProtoAndCheckerMaker { "user should avoid setting this attribute.") .SetDefault({}); AddComment(R"DOC( -Compute and return the noise-contrastive estimation training loss. -See [Noise-contrastive estimation: A new estimation principle for unnormalized statistical models](http://www.jmlr.org/proceedings/papers/v9/gutmann10a/gutmann10a.pdf). +Compute and return the noise-contrastive estimation training loss. See +`Noise-contrastive estimation: A new estimation principle for unnormalized +statistical models + `_. By default this operator uses a uniform distribution for sampling. )DOC"); } diff --git a/paddle/fluid/operators/parallel_do_op.cc b/paddle/fluid/operators/parallel_do_op.cc index 1012640d5e..c9744db3d0 100644 --- a/paddle/fluid/operators/parallel_do_op.cc +++ b/paddle/fluid/operators/parallel_do_op.cc @@ -295,7 +295,7 @@ class ParallelDoGradOp : public framework::OperatorBase { auto sum_op = framework::OpRegistry::CreateOp( "sum", {{"X", {s, tmp_name}}}, {{"Out", {s}}}, - framework::AttributeMap{}); + framework::AttributeMap{{"use_mkldnn", {false}}}); VLOG(10) << sum_op->DebugStringEx(sub_scopes[0]); sum_op->Run(*sub_scopes[0], places[0]); WaitOnPlace(places[0]); diff --git a/paddle/fluid/operators/pool_op.cc b/paddle/fluid/operators/pool_op.cc index 6707cdded4..f8ad63690e 100644 --- a/paddle/fluid/operators/pool_op.cc +++ b/paddle/fluid/operators/pool_op.cc @@ -204,8 +204,6 @@ void Pool2dOpMaker::Make() { // TODO(dzhwinter): need to registered layout transform function AddComment(R"DOC( -Pool2d Operator. - The pooling2d operation calculates the output based on the input, pooling_type and ksize, strides, paddings parameters. Input(X) and output(Out) are in NCHW format, where N is batch size, C is the @@ -215,19 +213,28 @@ These two elements represent height and width, respectively. The input(X) size and output(Out) size may be different. Example: + Input: + X shape: $(N, C, H_{in}, W_{in})$ + Output: + Out shape: $(N, C, H_{out}, W_{out})$ + For ceil_mode = false: $$ - H_{out} = \frac{(H_{in} - ksize[0] + 2 * paddings[0])}{strides[0]} + 1 \\ - W_{out} = \frac{(W_{in} - ksize[1] + 2 * paddings[1])}{strides[1]} + 1 + H_{out} = \\frac{(H_{in} - ksize[0] + 2 * paddings[0])}{strides[0]} + 1 + $$ + $$ + W_{out} = \\frac{(W_{in} - ksize[1] + 2 * paddings[1])}{strides[1]} + 1 $$ For ceil_mode = true: $$ - H_{out} = \frac{(H_{in} - ksize[0] + 2 * paddings[0] + strides[0] - 1)}{strides[0]} + 1 \\ - W_{out} = \frac{(W_{in} - ksize[1] + 2 * paddings[1] + strides[1] - 1)}{strides[1]} + 1 + H_{out} = \\frac{(H_{in} - ksize[0] + 2 * paddings[0] + strides[0] - 1)}{strides[0]} + 1 + $$ + $$ + W_{out} = \\frac{(W_{in} - ksize[1] + 2 * paddings[1] + strides[1] - 1)}{strides[1]} + 1 $$ )DOC"); diff --git a/paddle/fluid/operators/positive_negative_pair_op.h b/paddle/fluid/operators/positive_negative_pair_op.h index f20f33bbeb..db0a1002f4 100644 --- a/paddle/fluid/operators/positive_negative_pair_op.h +++ b/paddle/fluid/operators/positive_negative_pair_op.h @@ -14,7 +14,7 @@ limitations under the License. */ #include #include "paddle/fluid/framework/eigen.h" #include "paddle/fluid/framework/op_registry.h" -#include "paddle/utils/Logging.h" +#include "paddle/legacy/utils/Logging.h" namespace paddle { namespace operators { diff --git a/paddle/fluid/operators/prefetch_op.cc b/paddle/fluid/operators/prefetch_op.cc index f71ba84b31..8734282fe4 100644 --- a/paddle/fluid/operators/prefetch_op.cc +++ b/paddle/fluid/operators/prefetch_op.cc @@ -41,8 +41,8 @@ class PrefetchOp : public framework::OperatorBase { platform::DeviceContextPool& pool = platform::DeviceContextPool::Instance(); auto& ctx = *pool.Get(place); - detail::RPCClient* rpc_client = - detail::RPCClient::GetInstance(); + distributed::RPCClient* rpc_client = + distributed::RPCClient::GetInstance(); for (size_t i = 0; i < ins.size(); i++) { if (NeedSend(scope, ins[i])) { diff --git a/paddle/fluid/operators/print_op.cc b/paddle/fluid/operators/print_op.cc index db7634918a..cceac40295 100644 --- a/paddle/fluid/operators/print_op.cc +++ b/paddle/fluid/operators/print_op.cc @@ -16,6 +16,7 @@ #include #include "paddle/fluid/framework/op_registry.h" +#include "paddle/fluid/framework/var_type.h" #include "paddle/fluid/framework/variable.h" namespace paddle { @@ -62,7 +63,7 @@ struct Formater { } } void PrintDtype() { - if (dtype.hash_code() != typeid(const char).hash_code()) { + if (!framework::IsType(dtype)) { CLOG << "\tdtype: " << dtype.name() << std::endl; } } @@ -83,15 +84,15 @@ struct Formater { void PrintData(size_t size) { PADDLE_ENFORCE_NOT_NULL(data); // print float - if (dtype.hash_code() == typeid(const float).hash_code()) { + if (framework::IsType(dtype)) { Display(size); - } else if (dtype.hash_code() == typeid(const double).hash_code()) { + } else if (framework::IsType(dtype)) { Display(size); - } else if (dtype.hash_code() == typeid(const int).hash_code()) { + } else if (framework::IsType(dtype)) { Display(size); - } else if (dtype.hash_code() == typeid(const int64_t).hash_code()) { + } else if (framework::IsType(dtype)) { Display(size); - } else if (dtype.hash_code() == typeid(const bool).hash_code()) { + } else if (framework::IsType(dtype)) { Display(size); } else { CLOG << "\tdata: unprintable type: " << dtype.name() << std::endl; diff --git a/paddle/fluid/operators/random_crop_op.cc b/paddle/fluid/operators/random_crop_op.cc index 528a6e4a1b..123fa44fa3 100644 --- a/paddle/fluid/operators/random_crop_op.cc +++ b/paddle/fluid/operators/random_crop_op.cc @@ -37,6 +37,11 @@ class RandomCropOpMaker : public framework::OpProtoAndCheckerMaker { AddOutput("SeedOut", "The random seed after random cropping.") .AsIntermediate(); AddAttr>("shape", "The shape of a cropped instance."); + AddAttr("startup_seed", + "If the input 'Seed' is not initialized, the 'startup_seed' " + "will be used to replace it. Even so, the seed after random " + "crop will also be outputed to the 'SeedOut'.") + .SetDefault(0); AddComment(R"DOC( This operator takes a batch of instance, and do random cropping on each instance. It means that cropping positions differs on each instance, which is determined @@ -49,8 +54,6 @@ class RandomCropOpMaker : public framework::OpProtoAndCheckerMaker { class RandomCropOpInferShape : public framework::InferShapeBase { public: void operator()(framework::InferShapeContext* ctx) const override { - auto seed_dim = ctx->GetInputDim("Seed"); - PADDLE_ENFORCE(seed_dim.size() == 1 && seed_dim[0] == 1); auto shape = ctx->Attrs().Get>("shape"); auto x_dim = ctx->GetInputDim("X"); PADDLE_ENFORCE_GT(x_dim.size(), static_cast(shape.size())); @@ -62,7 +65,6 @@ class RandomCropOpInferShape : public framework::InferShapeBase { out_dim[x_i] = shape[shape_i]; } ctx->SetOutputDim("Out", framework::make_ddim(out_dim)); - ctx->SetOutputDim("SeedOut", framework::make_ddim({1})); } }; diff --git a/paddle/fluid/operators/random_crop_op.h b/paddle/fluid/operators/random_crop_op.h index f3261cbdc9..d68ba9d661 100644 --- a/paddle/fluid/operators/random_crop_op.h +++ b/paddle/fluid/operators/random_crop_op.h @@ -142,16 +142,22 @@ template class RandomCropKernel : public framework::OpKernel { public: virtual void Compute(const framework::ExecutionContext& ctx) const { - auto& seed_tensor = detail::Ref(ctx.Input("Seed")); int64_t seed = 0; - if (platform::is_cpu_place(seed_tensor.place())) { - seed = *seed_tensor.data(); + auto& seed_tensor = detail::Ref(ctx.Input("Seed")); + if (seed_tensor.IsInitialized()) { + if (platform::is_cpu_place(seed_tensor.place())) { + seed = *seed_tensor.data(); + } else { + LOG(WARNING) << "It is slow to place seed in GPU memory. Please verify " + "your program"; + framework::LoDTensor cpu_seed; + framework::TensorCopySync(seed_tensor, platform::CPUPlace(), &cpu_seed); + seed = *cpu_seed.data(); + } } else { - LOG(WARNING) << "It is slow to place seed in GPU memory. Please verify " - "your program"; - framework::LoDTensor cpu_seed; - framework::TensorCopySync(seed_tensor, platform::CPUPlace(), &cpu_seed); - seed = *cpu_seed.data(); + VLOG(5) << "WARNING: The input 'Seed' is not initialized, use attribute " + "'startup_seed' instead."; + seed = ctx.Attr("startup_seed"); } auto shape = ctx.Attr>("shape"); auto& x = detail::Ref(ctx.Input("X")); @@ -171,7 +177,7 @@ class RandomCropKernel : public framework::OpKernel { engine.discard(functor.prod_batchsize_dims_ * (functor.rank_ - functor.num_batchsize_dims_)); *ctx.Output("SeedOut")->mutable_data( - platform::CPUPlace()) = engine(); + framework::make_ddim({1}), platform::CPUPlace()) = engine(); } }; diff --git a/paddle/fluid/operators/read_op.cc b/paddle/fluid/operators/read_op.cc index 72a27d4358..65fcce8bb0 100644 --- a/paddle/fluid/operators/read_op.cc +++ b/paddle/fluid/operators/read_op.cc @@ -66,9 +66,19 @@ class ReadOp : public framework::OperatorBase { std::vector out_arg_names = Outputs("Out"); std::vector ins; reader->ReadNext(&ins); - PADDLE_ENFORCE(!ins.empty(), "There is no next data."); + if (ins.empty()) { + if (Attr("throw_eof_exp")) { + PADDLE_THROW_EOF(); + } else { + ins.resize(out_arg_names.size()); + for (auto& tensor : ins) { + // data type is not important for subsequent DataBalanceOpHandle + tensor.mutable_data(framework::make_ddim({0}), dev_place); + } + } + } PADDLE_ENFORCE_EQ(ins.size(), out_arg_names.size()); - for (size_t i = 0; i < ins.size(); ++i) { + for (size_t i = 0; i < out_arg_names.size(); ++i) { auto* out = scope.FindVar(out_arg_names[i])->GetMutable(); out->ShareDataWith(ins[i]); @@ -82,6 +92,14 @@ class ReadOpMaker : public framework::OpProtoAndCheckerMaker { void Make() override { AddInput("Reader", "(ReaderHolder) The executed reader."); AddOutput("Out", "(LoDTensor) The output data.").AsDuplicable(); + AddAttr( + "throw_eof_exp", + "If set true, an exception will be thrown when the Reader " + "yields empty (which means there is no next data).\n" + "NOTES: This flag must be true always. It will be set to false" + " only when the data-balance is enabled in ParallelExecutor" + " and it is set by ParallelExecutor instance, not users.") + .SetDefault(true); AddComment(R"DOC( Read Operator diff --git a/paddle/fluid/operators/reader/CMakeLists.txt b/paddle/fluid/operators/reader/CMakeLists.txt index 62532036f8..9dbcc35e6f 100644 --- a/paddle/fluid/operators/reader/CMakeLists.txt +++ b/paddle/fluid/operators/reader/CMakeLists.txt @@ -22,8 +22,8 @@ reader_library(create_batch_reader_op SRCS create_batch_reader_op.cc) reader_library(create_recordio_file_reader_op SRCS create_recordio_file_reader_op.cc) reader_library(create_double_buffer_reader_op SRCS create_double_buffer_reader_op.cc) reader_library(create_multi_pass_reader_op SRCS create_multi_pass_reader_op.cc) -reader_library(create_threaded_reader_op SRCS create_threaded_reader_op.cc) reader_library(create_custom_reader_op SRCS create_custom_reader_op.cc) +reader_library(create_py_reader_op SRCS create_py_reader_op.cc) cc_test(reader_blocking_queue_test SRCS reader_blocking_queue_test.cc) # Export local libraries to parent diff --git a/paddle/fluid/operators/reader/blocking_queue.h b/paddle/fluid/operators/reader/blocking_queue.h index 71684b1417..db8cf3b605 100644 --- a/paddle/fluid/operators/reader/blocking_queue.h +++ b/paddle/fluid/operators/reader/blocking_queue.h @@ -88,24 +88,29 @@ class BlockingQueue { receive_cv_.notify_all(); } - bool IsClosed() { + bool IsClosed() const { std::lock_guard lock(mutex_); return closed_; } - size_t Cap() { + size_t Cap() const { std::lock_guard lock(mutex_); return capacity_; } + size_t Size() const { + std::lock_guard lock(mutex_); + return queue_.size(); + } + private: size_t capacity_; bool closed_; std::deque queue_; - std::mutex mutex_; - std::condition_variable receive_cv_; - std::condition_variable send_cv_; + mutable std::mutex mutex_; + mutable std::condition_variable receive_cv_; + mutable std::condition_variable send_cv_; }; } // namespace reader } // namespace operators diff --git a/paddle/fluid/operators/reader/create_batch_reader_op.cc b/paddle/fluid/operators/reader/create_batch_reader_op.cc index ecbae3894d..1dbafd23e9 100644 --- a/paddle/fluid/operators/reader/create_batch_reader_op.cc +++ b/paddle/fluid/operators/reader/create_batch_reader_op.cc @@ -20,15 +20,19 @@ namespace reader { class BatchReader : public framework::DecoratedReader { public: - BatchReader(const std::shared_ptr& reader, int batch_size) - : DecoratedReader(reader), batch_size_(batch_size) { + BatchReader(const std::shared_ptr& reader, int batch_size, + bool discard_leftover) + : DecoratedReader(reader), + batch_size_(batch_size), + discard_leftover_(discard_leftover) { buffer_.reserve(batch_size_); } - void ReadNext(std::vector* out) override; + void ReadNextImpl(std::vector* out) override; private: int batch_size_; + bool discard_leftover_; std::vector> buffer_; }; @@ -46,8 +50,9 @@ class CreateBatchReaderOp : public framework::OperatorBase { } const auto& underlying_reader = scope.FindVar(Input("UnderlyingReader")) ->Get(); - out->Reset( - new BatchReader(underlying_reader.Get(), Attr("batch_size"))); + out->Reset(framework::MakeDecoratedReader( + underlying_reader, Attr("batch_size"), + Attr("discard_leftover"))); } }; @@ -57,6 +62,10 @@ class CreateBatchReaderOpMaker : public DecoratedReaderMakerBase { AddAttr("batch_size", "How many instances the batch reader yields each time.") .GreaterThan(0); + AddAttr("discard_leftover", + "If true, the leftover instances that are not enough for a " + "new batch will be discarded.") + .SetDefault(true); AddComment(R"DOC( CreateBatchReader Operator @@ -66,7 +75,7 @@ class CreateBatchReaderOpMaker : public DecoratedReaderMakerBase { } }; -void BatchReader::ReadNext(std::vector* out) { +void BatchReader::ReadNextImpl(std::vector* out) { buffer_.clear(); buffer_.reserve(batch_size_); for (int i = 0; i < batch_size_; ++i) { @@ -77,6 +86,9 @@ void BatchReader::ReadNext(std::vector* out) { break; } } + if (discard_leftover_ && buffer_.size() < batch_size_) { + buffer_.clear(); + } // Concat instances out->clear(); if (buffer_.empty()) { diff --git a/paddle/fluid/operators/reader/create_custom_reader_op.cc b/paddle/fluid/operators/reader/create_custom_reader_op.cc index 0a02fcdeaa..85394b336f 100644 --- a/paddle/fluid/operators/reader/create_custom_reader_op.cc +++ b/paddle/fluid/operators/reader/create_custom_reader_op.cc @@ -33,12 +33,13 @@ class CustomReader : public framework::DecoratedReader { source_var_names_(source_var_names), sink_var_names_(sink_var_names) {} - void ReadNext(std::vector* out) override; + void ReadNextImpl(std::vector* out) override; private: const framework::ProgramDesc program_; int sub_block_id_; framework::Executor exe_; + framework::Scope scope_; std::vector source_var_names_; std::vector sink_var_names_; @@ -59,10 +60,10 @@ class CreateCustomReaderOp : public framework::OperatorBase { } const auto& underlying_reader = scope.FindVar(Input("UnderlyingReader")) ->Get(); - out->Reset( - new CustomReader(underlying_reader.Get(), *sub_block, - Attr>("source_var_names"), - Attr>("sink_var_names"))); + out->Reset(framework::MakeDecoratedReader( + underlying_reader, *sub_block, + Attr>("source_var_names"), + Attr>("sink_var_names"))); } }; @@ -142,7 +143,7 @@ class CustomReaderInferVarType : public framework::VarTypeInference { } }; -void CustomReader::ReadNext(std::vector* out) { +void CustomReader::ReadNextImpl(std::vector* out) { out->clear(); std::vector underlying_outs; reader_->ReadNext(&underlying_outs); @@ -158,23 +159,24 @@ void CustomReader::ReadNext(std::vector* out) { // The scope for CustomReader's sub-block should be independent and shouldn't // be any other computation scope's child. Otherwise, data preprocessing and // compution cannot be concurrent. - framework::Scope scope; + framework::Scope* exe_scope = &scope_.NewScope(); // 1. Copy LoDTensors from underlying reader's output to source variables. for (size_t i = 0; i < source_var_names_.size(); ++i) { - framework::Variable* var = scope.Var(source_var_names_[i]); + framework::Variable* var = exe_scope->Var(source_var_names_[i]); framework::LoDTensor* tensor = var->GetMutable(); tensor->ShareDataWith(underlying_outs[i]); tensor->set_lod(underlying_outs[i].lod()); } // 2. Run the sub-block. - exe_.Run(program_, &scope, sub_block_id_, false, true); + exe_.Run(program_, exe_scope, sub_block_id_, false, true); // 3. Copy LoDTensors from sink variables to out. out->resize(sink_var_names_.size()); for (size_t i = 0; i < sink_var_names_.size(); ++i) { - const auto& tensor = detail::Ref(scope.FindVar(sink_var_names_[i])) + const auto& tensor = detail::Ref(exe_scope->FindVar(sink_var_names_[i])) .Get(); framework::TensorCopySync(tensor, platform::CPUPlace(), &(*out)[i]); } + scope_.DeleteScope(exe_scope); } } // namespace reader diff --git a/paddle/fluid/operators/reader/create_double_buffer_reader_op.cc b/paddle/fluid/operators/reader/create_double_buffer_reader_op.cc index 5f35b9b3ea..7b14370f4f 100644 --- a/paddle/fluid/operators/reader/create_double_buffer_reader_op.cc +++ b/paddle/fluid/operators/reader/create_double_buffer_reader_op.cc @@ -50,12 +50,21 @@ class DoubleBufferReader : public framework::DecoratedReader { StartPrefetcher(); } - void ReadNext(std::vector* out) override; - void ReInit() override; + void ReadNextImpl(std::vector* out) override; ~DoubleBufferReader() { EndPrefetcher(); } private: + void ShutdownImpl() override { + EndPrefetcher(); + reader_->Shutdown(); + } + + void StartImpl() override { + reader_->Start(); + StartPrefetcher(); + } + void StartPrefetcher() { channel_ = new reader::BlockingQueue(kChannelSize); prefetcher_ = std::thread([this] { PrefetchThreadFunc(); }); @@ -109,7 +118,8 @@ class CreateDoubleBufferReaderOp : public framework::OperatorBase { place = platform::CUDAPlace(static_cast(num)); } - out->Reset(new DoubleBufferReader(underlying_reader.Get(), place)); + out->Reset(framework::MakeDecoratedReader( + underlying_reader, place)); } }; @@ -136,7 +146,7 @@ class CreateDoubleBufferReaderOpMaker : public DecoratedReaderMakerBase { } }; -void DoubleBufferReader::ReadNext(std::vector* out) { +void DoubleBufferReader::ReadNextImpl(std::vector* out) { size_t cached_tensor_id; if (channel_->Receive(&cached_tensor_id)) { if (platform::is_gpu_place(place_)) { @@ -150,12 +160,6 @@ void DoubleBufferReader::ReadNext(std::vector* out) { } } -void DoubleBufferReader::ReInit() { - reader_->ReInit(); - EndPrefetcher(); - StartPrefetcher(); -} - void DoubleBufferReader::PrefetchThreadFunc() { VLOG(5) << "A new prefetch thread starts."; size_t cached_tensor_id = 0; diff --git a/paddle/fluid/operators/reader/create_multi_pass_reader_op.cc b/paddle/fluid/operators/reader/create_multi_pass_reader_op.cc index 19b54110b9..0a225597d3 100644 --- a/paddle/fluid/operators/reader/create_multi_pass_reader_op.cc +++ b/paddle/fluid/operators/reader/create_multi_pass_reader_op.cc @@ -24,23 +24,22 @@ class MultiPassReader : public framework::DecoratedReader { MultiPassReader(const std::shared_ptr& reader, int pass_num) : DecoratedReader(reader), pass_num_(pass_num), pass_count_(0) {} - void ReadNext(std::vector* out) override { + void ReadNextImpl(std::vector* out) override { reader_->ReadNext(out); - if (out->empty()) { + if (out->empty() && pass_count_ < pass_num_ - 1) { + reader_->Shutdown(); + reader_->Start(); + reader_->ReadNext(out); ++pass_count_; - if (pass_count_ < pass_num_) { - reader_->ReInit(); - reader_->ReadNext(out); - } } } - void ReInit() override { + private: + void StartImpl() override { pass_count_ = 0; - reader_->ReInit(); + reader_->Start(); } - private: int pass_num_; mutable int pass_count_; }; @@ -60,7 +59,8 @@ class CreateMultiPassReaderOp : public framework::OperatorBase { const auto& underlying_reader = scope.FindVar(Input("UnderlyingReader")) ->Get(); int pass_num = Attr("pass_num"); - out->Reset(new MultiPassReader(underlying_reader.Get(), pass_num)); + out->Reset(framework::MakeDecoratedReader( + underlying_reader, pass_num)); } }; diff --git a/paddle/fluid/operators/reader/create_py_reader_op.cc b/paddle/fluid/operators/reader/create_py_reader_op.cc new file mode 100644 index 0000000000..d411242799 --- /dev/null +++ b/paddle/fluid/operators/reader/create_py_reader_op.cc @@ -0,0 +1,89 @@ +// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "paddle/fluid/operators/reader/lod_tensor_blocking_queue.h" +#include "paddle/fluid/operators/reader/reader_op_registry.h" + +namespace paddle { +namespace operators { +namespace reader { + +class PyReader : public framework::FileReader { + public: + explicit PyReader(const std::shared_ptr& queue) + : framework::FileReader() { + PADDLE_ENFORCE(queue != nullptr, "LoDTensorBlockingQueue must not be null"); + queue_ = queue; + } + + void ReadNextImpl(std::vector* out) override { + bool success; + *out = queue_->Pop(&success); + if (!success) out->clear(); + } + + private: + void ShutdownImpl() override { /* TODO */ + } + + void StartImpl() override { /* TODO */ + } + + std::shared_ptr queue_; +}; + +class CreatePyReaderOp : public framework::OperatorBase { + public: + using framework::OperatorBase::OperatorBase; + + private: + void RunImpl(const framework::Scope& scope, + const platform::Place& dev_place) const override { + auto* out = scope.FindVar(Output("Out")) + ->template GetMutable(); + if (out->Get() != nullptr) return; + + const std::string& queue_name = Input("blocking_queue"); + auto* queue_holder_var = scope.FindVar(queue_name); + PADDLE_ENFORCE_NOT_NULL( + queue_holder_var, + "No LoDTensorBlockingQueueHolder variable with name %s found", + queue_name); + auto* queue_holder = + queue_holder_var->template GetMutable(); + + out->Reset(std::make_shared(queue_holder->GetQueue())); + } +}; + +class CreatePyReaderOpMaker : public FileReaderMakerBase { + protected: + void Apply() override { + AddInput("blocking_queue", + "Name of the `LoDTensorBlockingQueueHolder` variable"); + + AddComment(R"DOC( + Create PyReader to support LoDTensor data feeding in Python side. + )DOC"); + } +}; + +} // namespace reader +} // namespace operators +} // namespace paddle + +namespace reader = ::paddle::operators::reader; + +REGISTER_FILE_READER_OPERATOR(create_py_reader, reader::CreatePyReaderOp, + reader::CreatePyReaderOpMaker); diff --git a/paddle/fluid/operators/reader/create_random_data_generator_op.cc b/paddle/fluid/operators/reader/create_random_data_generator_op.cc index 5b7e8a063a..e5c116dfcd 100644 --- a/paddle/fluid/operators/reader/create_random_data_generator_op.cc +++ b/paddle/fluid/operators/reader/create_random_data_generator_op.cc @@ -19,11 +19,11 @@ namespace operators { namespace reader { template -class RandomDataGenerator : public framework::ReaderBase { +class RandomDataGenerator : public framework::FileReader { public: RandomDataGenerator(const std::vector& shapes, float low, float high) - : framework::ReaderBase(), low_(low), high_(high), shapes_(shapes) { + : framework::FileReader(), low_(low), high_(high), shapes_(shapes) { PADDLE_ENFORCE_LE(low, high, "'low' shouldn't be greater than 'high'.(%f vs %f)", low, high); @@ -32,7 +32,7 @@ class RandomDataGenerator : public framework::ReaderBase { dist_ = std::uniform_real_distribution(low_, high_); } - void ReadNext(std::vector* out) override { + void ReadNextImpl(std::vector* out) override { out->clear(); out->reserve(shapes_.size()); for (const framework::DDim& shape : shapes_) { @@ -51,8 +51,6 @@ class RandomDataGenerator : public framework::ReaderBase { } } - void ReInit() override { return; } - private: float low_; float high_; @@ -79,8 +77,8 @@ class CreateRandomDataGeneratorOp : public framework::OperatorBase { std::vector shapes = RestoreShapes(shape_concat, ranks); auto* out = scope.FindVar(Output("Out")) ->template GetMutable(); - out->Reset(new RandomDataGenerator(shapes, Attr("low"), - Attr("high"))); + out->Reset(std::make_shared>( + shapes, Attr("low"), Attr("high"))); } }; diff --git a/paddle/fluid/operators/reader/create_recordio_file_reader_op.cc b/paddle/fluid/operators/reader/create_recordio_file_reader_op.cc index 282ec3f36b..b32f09b225 100644 --- a/paddle/fluid/operators/reader/create_recordio_file_reader_op.cc +++ b/paddle/fluid/operators/reader/create_recordio_file_reader_op.cc @@ -21,10 +21,8 @@ namespace reader { template class RecordIOFileReader : public framework::FileReader { public: - explicit RecordIOFileReader(const std::string& filename, - const std::vector& dims) - : FileReader(dims), - scanner_(filename), + explicit RecordIOFileReader(const std::string& filename) + : scanner_(filename), dev_ctx_(*platform::DeviceContextPool::Instance().Get( platform::CPUPlace())) { if (ThreadSafe) { @@ -33,8 +31,6 @@ class RecordIOFileReader : public framework::FileReader { LOG(INFO) << "Creating file reader" << filename; } - void ReInit() override { scanner_.Reset(); } - protected: void ReadNextImpl(std::vector* out) override { if (ThreadSafe) { @@ -45,6 +41,8 @@ class RecordIOFileReader : public framework::FileReader { } } + void StartImpl() override { scanner_.Reset(); } + private: std::unique_ptr mutex_; recordio::Scanner scanner_; @@ -58,31 +56,26 @@ class CreateRecordIOReaderOp : public framework::OperatorBase { private: void RunImpl(const framework::Scope& scope, const platform::Place& dev_place) const override { - const auto& shape_concat = Attr>("shape_concat"); - const auto& ranks = Attr>("ranks"); - PADDLE_ENFORCE(!shape_concat.empty() && !ranks.empty()); - PADDLE_ENFORCE_EQ(std::accumulate(ranks.begin(), ranks.end(), 0), - static_cast(shape_concat.size()), - "The accumulate of all ranks should be equal to the " - "shape concat's length."); std::string filename = Attr("filename"); - auto* out = scope.FindVar(Output("Out")) ->template GetMutable(); - out->Reset(new RecordIOFileReader( - filename, RestoreShapes(shape_concat, ranks))); + out->Reset(std::make_shared>(filename)); } }; class CreateRecordIOReaderOpMaker : public FileReaderMakerBase { protected: void Apply() override { - AddAttr("filename", "The filename of record io reader"); + AddAttr( + "filename", + "The filename of record file. This file will given to reader."); AddComment(R"DOC( - CreateRecordIOReader Operator +Open a recordio file and return the reader object. The returned reader object +is thread-safe. - Create a reader from a record io file +NOTE: This is a very low-level API. It is used for debugging data file or +training. Please use `open_files` instead of this API for production usage. )DOC"); } }; diff --git a/paddle/fluid/operators/reader/create_shuffle_reader_op.cc b/paddle/fluid/operators/reader/create_shuffle_reader_op.cc index 57e8e21214..4b308abc29 100644 --- a/paddle/fluid/operators/reader/create_shuffle_reader_op.cc +++ b/paddle/fluid/operators/reader/create_shuffle_reader_op.cc @@ -34,7 +34,7 @@ class ShuffleReader : public framework::DecoratedReader { ReloadBuffer(); } - void ReadNext(std::vector* out) override { + void ReadNextImpl(std::vector* out) override { out->clear(); if (iteration_pos_ >= buffer_.size()) { VLOG(10) << "Resetting shuffle buffer"; @@ -47,6 +47,17 @@ class ShuffleReader : public framework::DecoratedReader { } private: + void ShutdownImpl() override { + buffer_.clear(); + iteration_pos_ = 0; + reader_->Shutdown(); + } + + void StartImpl() override { + reader_->Start(); + ReloadBuffer(); + } + void ReloadBuffer() { buffer_.clear(); buffer_.reserve(buffer_size_); @@ -86,9 +97,8 @@ class CreateShuffleReaderOp : public framework::OperatorBase { } const auto& underlying_reader = scope.FindVar(Input("UnderlyingReader")) ->Get(); - out->Reset( - new ShuffleReader(underlying_reader.Get(), - static_cast(Attr("buffer_size")))); + out->Reset(framework::MakeDecoratedReader( + underlying_reader, static_cast(Attr("buffer_size")))); } }; diff --git a/paddle/fluid/operators/reader/create_threaded_reader_op.cc b/paddle/fluid/operators/reader/create_threaded_reader_op.cc deleted file mode 100644 index 3798015146..0000000000 --- a/paddle/fluid/operators/reader/create_threaded_reader_op.cc +++ /dev/null @@ -1,79 +0,0 @@ -// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#include "paddle/fluid/operators/detail/safe_ref.h" -#include "paddle/fluid/operators/reader/reader_op_registry.h" - -namespace paddle { -namespace operators { -namespace reader { - -class ThreadedReader : public framework::DecoratedReader { - public: - explicit ThreadedReader(const std::shared_ptr& reader) - : DecoratedReader(reader) {} - - void ReadNext(std::vector* out) override { - std::lock_guard lock(mutex_); - reader_->ReadNext(out); - } - - void ReInit() override { reader_->ReInit(); } - - private: - std::mutex mutex_; -}; - -class CreateThreadedReaderOp : public framework::OperatorBase { - public: - using framework::OperatorBase::OperatorBase; - - private: - void RunImpl(const framework::Scope& scope, - const platform::Place& dev_place) const override { - auto* out = detail::Ref(scope.FindVar(Output("Out"))) - .GetMutable(); - if (out->Get() != nullptr) { - return; - } - const auto& underlying_reader = scope.FindVar(Input("UnderlyingReader")) - ->Get(); - out->Reset(new ThreadedReader(underlying_reader.Get())); - } -}; - -class CreateThreadedReaderOpMaker : public DecoratedReaderMakerBase { - protected: - void Apply() override { - AddComment(R"DOC( - CreateThreadedReader Operator - - This operator creates a threaded reader. A threaded reader's - 'ReadNext()' can be invoked by several threads at the same - time. - When the attribute 'safe_mode' is true, the threaded reader's - 'ReInit()' is disabled to avoid unexpected bugs in multi-thread - environment. - )DOC"); - } -}; - -} // namespace reader -} // namespace operators -} // namespace paddle - -namespace reader = paddle::operators::reader; -REGISTER_DECORATED_READER_OPERATOR(create_threaded_reader, - reader::CreateThreadedReaderOp, - reader::CreateThreadedReaderOpMaker); diff --git a/paddle/fluid/operators/reader/lod_tensor_blocking_queue.h b/paddle/fluid/operators/reader/lod_tensor_blocking_queue.h new file mode 100644 index 0000000000..30d962ba10 --- /dev/null +++ b/paddle/fluid/operators/reader/lod_tensor_blocking_queue.h @@ -0,0 +1,103 @@ +// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#pragma once + +#include +#include + +#include "paddle/fluid/framework/ddim.h" +#include "paddle/fluid/framework/lod_tensor.h" +#include "paddle/fluid/operators/reader/blocking_queue.h" +#include "paddle/fluid/platform/place.h" + +namespace paddle { +namespace operators { +namespace reader { + +class LoDTensorBlockingQueueHolder; + +class LoDTensorBlockingQueue { + friend class LoDTensorBlockingQueueHolder; + + private: + LoDTensorBlockingQueue(size_t capacity, + const std::vector& dims) + : queue_(capacity), dims_(dims) {} + + public: + bool Push(const std::vector& lod_tensor_vec) { + CheckDims(lod_tensor_vec); + return queue_.Send(lod_tensor_vec); + } + + bool Push(std::vector&& lod_tensor_vec) { + CheckDims(lod_tensor_vec); + return queue_.Send(std::move(lod_tensor_vec)); + } + + std::vector Pop(bool* ok = nullptr) { + std::vector lod_tensor_vec; + bool success = queue_.Receive(&lod_tensor_vec); + if (ok != nullptr) *ok = success; + return lod_tensor_vec; + } + + inline size_t Cap() const { return queue_.Cap(); } + + inline size_t Size() const { return queue_.Size(); } + + inline void Close() { return queue_.Close(); } + + inline bool IsClosed() const { return queue_.IsClosed(); } + + private: + void CheckDims(const std::vector& lod_tensor_vec) { + PADDLE_ENFORCE(dims_.size() == lod_tensor_vec.size(), + "Expect input size is %d but found %s", dims_.size(), + lod_tensor_vec.size()); + for (size_t i = 0; i < dims_.size(); ++i) { + const auto& in_dims = framework::slice_ddim( + lod_tensor_vec[i].dims(), 1, lod_tensor_vec[i].dims().size()); + const auto& expect_dims = + framework::slice_ddim(dims_[i], 1, dims_[i].size()); + PADDLE_ENFORCE(in_dims == expect_dims, + "Dims of the %d-th input tensor do not match", i); + } + } + + BlockingQueue> queue_; + std::vector dims_; +}; + +class LoDTensorBlockingQueueHolder { + public: + void InitOnce(size_t capacity, const std::vector& dims) { + PADDLE_ENFORCE( + queue_ == nullptr, + "LoDTensorBlockingQueueHolder::InitOnce() can only be called once"); + queue_.reset(new LoDTensorBlockingQueue(capacity, dims)); + } + + inline const std::shared_ptr& GetQueue() const { + return queue_; + } + + private: + std::shared_ptr queue_; +}; + +} // namespace reader +} // namespace operators +} // namespace paddle diff --git a/paddle/fluid/operators/reader/open_files_op.cc b/paddle/fluid/operators/reader/open_files_op.cc index 31e5d81e55..9a8d203672 100644 --- a/paddle/fluid/operators/reader/open_files_op.cc +++ b/paddle/fluid/operators/reader/open_files_op.cc @@ -23,24 +23,26 @@ namespace reader { class MultiFileReader : public framework::ReaderBase { public: - MultiFileReader(const std::vector& file_names, - const std::vector& dims, size_t thread_num, + MultiFileReader(const std::vector& file_names, size_t thread_num, size_t buffer_size) : buffer_size_(buffer_size) { readers_.reserve(file_names.size()); for (const std::string& f_name : file_names) { - readers_.emplace_back(CreateReaderByFileName(f_name, dims)); + readers_.emplace_back(CreateReaderByFileName(f_name)); } prefetchers_.resize(thread_num); StartNewScheduler(); } - void ReadNext(std::vector* out) override; - void ReInit() override; + void ReadNextImpl(std::vector* out) override; ~MultiFileReader() { EndScheduler(); } private: + void ShutdownImpl() override { EndScheduler(); } + + void StartImpl() override { StartNewScheduler(); } + void StartNewScheduler(); void EndScheduler(); void ScheduleThreadFunc(); @@ -55,17 +57,12 @@ class MultiFileReader : public framework::ReaderBase { reader::BlockingQueue>* buffer_; }; -void MultiFileReader::ReadNext(std::vector* out) { +void MultiFileReader::ReadNextImpl(std::vector* out) { if (!buffer_->Receive(out)) { out->clear(); } } -void MultiFileReader::ReInit() { - EndScheduler(); - StartNewScheduler(); -} - void MultiFileReader::StartNewScheduler() { size_t thread_num = prefetchers_.size(); waiting_reader_idx_ = new reader::BlockingQueue(readers_.size()); @@ -120,7 +117,7 @@ void MultiFileReader::ScheduleThreadFunc() { } } } - // If users invoke ReInit() when scheduler is running, it will close the + // If users invoke Shutdown() when scheduler is running, it will close the // 'avaiable_thread_idx_' and prefecther threads have no way to tell scheduler // to release their resource. So a check is needed before scheduler ends. for (auto& p : prefetchers_) { @@ -138,7 +135,8 @@ void MultiFileReader::PrefetchThreadFunc(size_t reader_idx, size_t thread_idx) { std::vector ins; reader->ReadNext(&ins); if (ins.empty()) { - reader->ReInit(); + reader->Shutdown(); + reader->Start(); break; } try { @@ -180,9 +178,8 @@ class OpenFilesOp : public framework::OperatorBase { auto* out = scope.FindVar(Output("Out")) ->template GetMutable(); - out->Reset(new MultiFileReader(file_names, - RestoreShapes(shape_concat, ranks), - thread_num, buffer_size)); + out->Reset( + std::make_shared(file_names, thread_num, buffer_size)); } }; diff --git a/paddle/fluid/operators/reader/reader_op_registry.cc b/paddle/fluid/operators/reader/reader_op_registry.cc index 612e1f5eca..b82aab1214 100644 --- a/paddle/fluid/operators/reader/reader_op_registry.cc +++ b/paddle/fluid/operators/reader/reader_op_registry.cc @@ -39,7 +39,7 @@ std::unordered_map& FileReaderRegistry() { } std::unique_ptr CreateReaderByFileName( - const std::string& file_name, const std::vector& dims) { + const std::string& file_name) { size_t separator_pos = file_name.find_last_of(kFileFormatSeparator); PADDLE_ENFORCE_NE(separator_pos, std::string::npos, "File name illegal! A legal file name should be like: " @@ -49,12 +49,12 @@ std::unique_ptr CreateReaderByFileName( auto itor = FileReaderRegistry().find(filetype); PADDLE_ENFORCE(itor != FileReaderRegistry().end(), "No file reader registered for '%s' format.", filetype); - framework::ReaderBase* reader = (itor->second)(file_name, dims); + framework::ReaderBase* reader = (itor->second)(file_name); return std::unique_ptr(reader); } void FileReaderMakerBase::Make() { - AddOutput("Out", "(ReaderHolder) The created random reader.").AsDuplicable(); + AddOutput("Out", "(ReaderHolder): The created random reader.").AsDuplicable(); AddAttr>("shape_concat", "The concat of all data's shapes."); AddAttr>( "ranks", diff --git a/paddle/fluid/operators/reader/reader_op_registry.h b/paddle/fluid/operators/reader/reader_op_registry.h index 244bf15f06..25c3e7d77b 100644 --- a/paddle/fluid/operators/reader/reader_op_registry.h +++ b/paddle/fluid/operators/reader/reader_op_registry.h @@ -25,22 +25,21 @@ namespace reader { static constexpr char kFileFormatSeparator[] = "."; -using FileReaderCreator = std::function&)>; +using FileReaderCreator = + std::function; std::unordered_map& FileReaderRegistry(); template int RegisterFileReader(const std::string& filetype) { - FileReaderRegistry()[filetype] = []( - const std::string& fn, const std::vector& dims) { - return new Reader(fn, dims); + FileReaderRegistry()[filetype] = [](const std::string& fn) { + return new Reader(fn); }; return 0; } std::unique_ptr CreateReaderByFileName( - const std::string& file_name, const std::vector& dims); + const std::string& file_name); extern std::vector RestoreShapes( const std::vector& shape_concat, const std::vector& ranks); diff --git a/paddle/fluid/operators/recurrent_op.cc b/paddle/fluid/operators/recurrent_op.cc index 9c1cee7022..162bfcbb08 100644 --- a/paddle/fluid/operators/recurrent_op.cc +++ b/paddle/fluid/operators/recurrent_op.cc @@ -429,7 +429,8 @@ class RecurrentGradOp : public RecurrentBase { auto sum_op = framework::OpRegistry::CreateOp( "sum", {{"X", {pg_names[param_id], new_inside_name}}}, - {{"Out", {pg_names[param_id]}}}, framework::AttributeMap{}); + {{"Out", {pg_names[param_id]}}}, + framework::AttributeMap{{"use_mkldnn", {false}}}); sum_op->Run(cur_scope, place); cur_scope.Rename(new_inside_name, inside_grad_name); diff --git a/paddle/fluid/operators/recv_op.cc b/paddle/fluid/operators/recv_op.cc index 15dfb5469b..9854a31f5b 100644 --- a/paddle/fluid/operators/recv_op.cc +++ b/paddle/fluid/operators/recv_op.cc @@ -43,8 +43,8 @@ class RecvOp : public framework::OperatorBase { // For profiling platform::RecordEvent record_event(Type(), &ctx); - detail::RPCClient* rpc_client = - detail::RPCClient::GetInstance(); + distributed::RPCClient* rpc_client = + distributed::RPCClient::GetInstance(); for (size_t i = 0; i < outs.size(); i++) { VLOG(3) << "getting " << outs[i] << " from " << epmap[i]; diff --git a/paddle/fluid/operators/reshape_op.cc b/paddle/fluid/operators/reshape_op.cc index 7f743f577f..918f3be533 100644 --- a/paddle/fluid/operators/reshape_op.cc +++ b/paddle/fluid/operators/reshape_op.cc @@ -12,14 +12,108 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. */ -#include "paddle/fluid/operators/reshape_op.h" - #include #include +#include "paddle/fluid/framework/op_registry.h" namespace paddle { namespace operators { +class ReshapeOp : public framework::OperatorWithKernel { + public: + ReshapeOp(const std::string &type, const framework::VariableNameMap &inputs, + const framework::VariableNameMap &outputs, + const framework::AttributeMap &attrs) + : OperatorWithKernel(type, inputs, outputs, attrs) {} + + void InferShape(framework::InferShapeContext *ctx) const override { + PADDLE_ENFORCE(ctx->HasInput("X"), + "Input(X) of ReshapeOp should not be null."); + PADDLE_ENFORCE(ctx->HasOutput("Out"), + "Output(Out) of ReshapeOp should not be null."); + + const std::vector &shape = ctx->Attrs().Get>("shape"); + PADDLE_ENFORCE(!shape.empty(), + "The shape information must be set by Attr(shape)."); + + if (ctx->HasInput("Shape") && ctx->IsRuntime()) { + // If true, set the shape of Output(Out) according to Input(Shape) in + // ReshapeKernel with ExecutionContext. Also check LoD in ReshapeKernel. + ctx->ShareLoD("X", /*->*/ "Out"); + return; + } + + auto x_dims = ctx->GetInputDim("X"); + auto out_dims = ValidateShape(shape, x_dims); + ctx->SetOutputDim("Out", out_dims); + if (x_dims[0] == out_dims[0]) { + // Only pass LoD when the first dimension of output and Input(X) + // are the same. + ctx->ShareLoD("X", /*->*/ "Out"); + } + } + + static framework::DDim ValidateShape(const std::vector shape, + const framework::DDim &in_dims) { + const int64_t in_size = framework::product(in_dims); + // only one dimension can be set to -1, whose size will be automatically + // infered. + const int64_t unk_dim_val = -1; + const int64_t copy_dim_val = 0; + + std::vector output_shape(shape.size(), 0); + int64_t capacity = 1; + int unk_dim_idx = -1; + for (size_t i = 0; i < shape.size(); ++i) { + if (shape[i] == unk_dim_val) { + PADDLE_ENFORCE( + unk_dim_idx == -1, + "Only one input dimension of Attr(shape) can be unknown."); + unk_dim_idx = i; + } else if (shape[i] == copy_dim_val) { + PADDLE_ENFORCE( + static_cast(i) < in_dims.size(), + "The index of dimension to copy from input shape must be less " + "than the size of input shape."); + } else { + PADDLE_ENFORCE( + shape[i] > 0, + "Each input dimension of Attr(shape) must not be negtive except " + "one unknown dimension."); + } + + capacity *= (shape[i] ? shape[i] : in_dims[i]); + output_shape[i] = + (shape[i] ? static_cast(shape[i]) : in_dims[i]); + } + + if (unk_dim_idx != -1) { + if (in_size > 0) { + // in_size < 0 and is un-determinate in compile time, skip the check, + // for example, in_dims = [-1, 8, 1, 1], shape = [-1, 3, 8], + // capacity = -24, in_size = -8, output_shape[0] = 0 + // the following check will fail. + output_shape[unk_dim_idx] = -in_size / capacity; + PADDLE_ENFORCE_EQ(output_shape[unk_dim_idx] * capacity, -in_size, + "Invalid shape is given."); + } else { + output_shape[unk_dim_idx] = -1; + } + } else { + PADDLE_ENFORCE_EQ(capacity, in_size, "Invalid shape is given."); + } + return framework::make_ddim(output_shape); + } + + protected: + framework::OpKernelType GetExpectedKernelType( + const framework::ExecutionContext &ctx) const override { + return framework::OpKernelType( + framework::ToDataType(ctx.Input("X")->type()), + ctx.device_context()); + } +}; + class ReshapeOpMaker : public framework::OpProtoAndCheckerMaker { public: void Make() override { @@ -107,19 +201,93 @@ class ReshapeGradOp : public framework::OperatorWithKernel { } }; +class ReshapeKernel { + public: + void operator()(const framework::ExecutionContext &ctx) const { + auto *out = ctx.Output("Out"); + auto *in = ctx.Input("X"); + + auto *shape_tensor = ctx.HasInput("Shape") + ? ctx.Input("Shape") + : nullptr; + + framework::DDim out_dims = out->dims(); + + if (shape_tensor) { + auto *shape_data = shape_tensor->data(); + framework::Tensor cpu_shape_tensor; + if (platform::is_gpu_place(ctx.GetPlace())) { + TensorCopySync(*shape_tensor, platform::CPUPlace(), &cpu_shape_tensor); + shape_data = cpu_shape_tensor.data(); + } + auto shape = + std::vector(shape_data, shape_data + shape_tensor->numel()); + out_dims = ReshapeOp::ValidateShape(shape, in->dims()); + } + if (!in->lod().empty()) { + PADDLE_ENFORCE_EQ( + out_dims[0], in->dims()[0], + "Reshape operator cannot reshape an input sequence batch " + "into an output sequence batch that has a different " + "number of time steps. Please consider using " + "sequence_reshape op."); + } + + bool inplace = ctx.Attr("inplace"); + out->Resize(out_dims); + if (!inplace) { + out->mutable_data(ctx.GetPlace(), in->type()); + framework::TensorCopySync(*in, ctx.GetPlace(), out); + out->Resize(out_dims); + } else { + out->ShareDataWith(*in); + out->Resize(out_dims); + } + } +}; + +class ReshapeGradKernel { + public: + void operator()(const framework::ExecutionContext &ctx) const { + auto *d_out = ctx.Input(framework::GradVarName("Out")); + auto *d_x = ctx.Output(framework::GradVarName("X")); + + d_x->mutable_data(ctx.GetPlace(), d_out->type()); + bool inplace = ctx.Attr("inplace"); + + auto in_dims = d_x->dims(); + if (!inplace) { + framework::TensorCopy(*d_out, ctx.GetPlace(), ctx.device_context(), d_x); + ctx.device_context().Wait(); + d_x->Resize(in_dims); + } else { + d_x->ShareDataWith(*d_out); + d_x->Resize(in_dims); + } + } +}; + } // namespace operators } // namespace paddle namespace ops = paddle::operators; -using CPU = paddle::platform::CPUDeviceContext; REGISTER_OPERATOR(reshape, ops::ReshapeOp, ops::ReshapeOpMaker, paddle::framework::DefaultGradOpDescMaker); REGISTER_OPERATOR(reshape_grad, ops::ReshapeGradOp); -REGISTER_OP_CPU_KERNEL(reshape, ops::ReshapeKernel, - ops::ReshapeKernel, - ops::ReshapeKernel, - ops::ReshapeKernel); -REGISTER_OP_CPU_KERNEL(reshape_grad, ops::ReshapeGradKernel, - ops::ReshapeGradKernel, - ops::ReshapeGradKernel, - ops::ReshapeGradKernel); +REGISTER_OP_CPU_KERNEL_FUNCTOR(reshape, float, ops::ReshapeKernel, double, + ops::ReshapeKernel, int, ops::ReshapeKernel, + int64_t, ops::ReshapeKernel); +REGISTER_OP_CPU_KERNEL_FUNCTOR(reshape_grad, float, ops::ReshapeGradKernel, + double, ops::ReshapeGradKernel, int, + ops::ReshapeGradKernel, int64_t, + ops::ReshapeGradKernel); + +#ifdef PADDLE_WITH_CUDA +REGISTER_OP_CUDA_KERNEL_FUNCTOR(reshape, float, ops::ReshapeKernel, double, + ops::ReshapeKernel, int, ops::ReshapeKernel, + int64_t, ops::ReshapeKernel); +REGISTER_OP_CUDA_KERNEL_FUNCTOR(reshape_grad, float, ops::ReshapeGradKernel, + double, ops::ReshapeGradKernel, int, + ops::ReshapeGradKernel, int64_t, + ops::ReshapeGradKernel); +#endif diff --git a/paddle/fluid/operators/reshape_op.cu b/paddle/fluid/operators/reshape_op.cu deleted file mode 100644 index c628c634e2..0000000000 --- a/paddle/fluid/operators/reshape_op.cu +++ /dev/null @@ -1,26 +0,0 @@ -/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#include "paddle/fluid/operators/reshape_op.h" -using CUDA = paddle::platform::CUDADeviceContext; - -REGISTER_OP_CUDA_KERNEL(reshape, paddle::operators::ReshapeKernel, - paddle::operators::ReshapeKernel, - paddle::operators::ReshapeKernel, - paddle::operators::ReshapeKernel); -REGISTER_OP_CUDA_KERNEL(reshape_grad, - paddle::operators::ReshapeGradKernel, - paddle::operators::ReshapeGradKernel, - paddle::operators::ReshapeGradKernel, - paddle::operators::ReshapeGradKernel); diff --git a/paddle/fluid/operators/reshape_op.h b/paddle/fluid/operators/reshape_op.h deleted file mode 100644 index 3dd8c7c11e..0000000000 --- a/paddle/fluid/operators/reshape_op.h +++ /dev/null @@ -1,189 +0,0 @@ -/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#pragma once - -#include -#include - -#include "paddle/fluid/framework/eigen.h" -#include "paddle/fluid/framework/op_registry.h" - -namespace paddle { -namespace operators { - -class ReshapeOp : public framework::OperatorWithKernel { - public: - ReshapeOp(const std::string &type, const framework::VariableNameMap &inputs, - const framework::VariableNameMap &outputs, - const framework::AttributeMap &attrs) - : OperatorWithKernel(type, inputs, outputs, attrs) {} - - void InferShape(framework::InferShapeContext *ctx) const override { - PADDLE_ENFORCE(ctx->HasInput("X"), - "Input(X) of ReshapeOp should not be null."); - PADDLE_ENFORCE(ctx->HasOutput("Out"), - "Output(Out) of ReshapeOp should not be null."); - - const std::vector &shape = ctx->Attrs().Get>("shape"); - PADDLE_ENFORCE(!shape.empty(), - "The shape information must be set by Attr(shape)."); - - if (ctx->HasInput("Shape") && ctx->IsRuntime()) { - // If true, set the shape of Output(Out) according to Input(Shape) in - // ReshapeKernel with ExecutionContext. Also check LoD in ReshapeKernel. - ctx->ShareLoD("X", /*->*/ "Out"); - return; - } - - auto x_dims = ctx->GetInputDim("X"); - auto out_dims = ValidateShape(shape, x_dims); - ctx->SetOutputDim("Out", out_dims); - if (x_dims[0] == out_dims[0]) { - // Only pass LoD when the first dimension of output and Input(X) - // are the same. - ctx->ShareLoD("X", /*->*/ "Out"); - } - } - - static framework::DDim ValidateShape(const std::vector shape, - const framework::DDim &in_dims) { - const int64_t in_size = framework::product(in_dims); - // only one dimension can be set to -1, whose size will be automatically - // infered. - const int64_t unk_dim_val = -1; - const int64_t copy_dim_val = 0; - - std::vector output_shape(shape.size(), 0); - int64_t capacity = 1; - int unk_dim_idx = -1; - for (size_t i = 0; i < shape.size(); ++i) { - if (shape[i] == unk_dim_val) { - PADDLE_ENFORCE( - unk_dim_idx == -1, - "Only one input dimension of Attr(shape) can be unknown."); - unk_dim_idx = i; - } else if (shape[i] == copy_dim_val) { - PADDLE_ENFORCE( - static_cast(i) < in_dims.size(), - "The index of dimension to copy from input shape must be less " - "than the size of input shape."); - } else { - PADDLE_ENFORCE( - shape[i] > 0, - "Each input dimension of Attr(shape) must not be negtive except " - "one unknown dimension."); - } - - capacity *= (shape[i] ? shape[i] : in_dims[i]); - output_shape[i] = - (shape[i] ? static_cast(shape[i]) : in_dims[i]); - } - - if (unk_dim_idx != -1) { - if (in_size > 0) { - // in_size < 0 and is un-determinate in compile time, skip the check, - // for example, in_dims = [-1, 8, 1, 1], shape = [-1, 3, 8], - // capacity = -24, in_size = -8, output_shape[0] = 0 - // the following check will fail. - output_shape[unk_dim_idx] = -in_size / capacity; - PADDLE_ENFORCE_EQ(output_shape[unk_dim_idx] * capacity, -in_size, - "Invalid shape is given."); - } else { - output_shape[unk_dim_idx] = -1; - } - } else { - PADDLE_ENFORCE_EQ(capacity, in_size, "Invalid shape is given."); - } - return framework::make_ddim(output_shape); - } - - protected: - framework::OpKernelType GetExpectedKernelType( - const framework::ExecutionContext &ctx) const override { - return framework::OpKernelType( - framework::ToDataType(ctx.Input("X")->type()), - ctx.device_context()); - } -}; - -template -class ReshapeKernel : public framework::OpKernel { - public: - void Compute(const framework::ExecutionContext &ctx) const { - auto *out = ctx.Output("Out"); - auto *in = ctx.Input("X"); - - auto *shape_tensor = ctx.HasInput("Shape") - ? ctx.Input("Shape") - : nullptr; - - framework::DDim out_dims = out->dims(); - - if (shape_tensor) { - auto *shape_data = shape_tensor->data(); - framework::Tensor cpu_shape_tensor; - if (platform::is_gpu_place(ctx.GetPlace())) { - TensorCopySync(*shape_tensor, platform::CPUPlace(), &cpu_shape_tensor); - shape_data = cpu_shape_tensor.data(); - } - auto shape = - std::vector(shape_data, shape_data + shape_tensor->numel()); - out_dims = ReshapeOp::ValidateShape(shape, in->dims()); - } - if (!in->lod().empty()) { - PADDLE_ENFORCE_EQ( - out_dims[0], in->dims()[0], - "Reshape operator cannot reshape an input sequence batch " - "into an output sequence batch that has a different " - "number of time steps. Please consider using " - "sequence_reshape op."); - } - - bool inplace = ctx.Attr("inplace"); - out->Resize(out_dims); - if (!inplace) { - out->mutable_data(ctx.GetPlace()); - framework::TensorCopySync(*in, ctx.GetPlace(), out); - out->Resize(out_dims); - } else { - out->ShareDataWith(*in); - out->Resize(out_dims); - } - } -}; - -template -class ReshapeGradKernel : public framework::OpKernel { - public: - void Compute(const framework::ExecutionContext &ctx) const { - auto *d_out = ctx.Input(framework::GradVarName("Out")); - auto *d_x = ctx.Output(framework::GradVarName("X")); - - d_x->mutable_data(ctx.GetPlace()); - bool inplace = ctx.Attr("inplace"); - - auto in_dims = d_x->dims(); - if (!inplace) { - framework::TensorCopy(*d_out, ctx.GetPlace(), ctx.device_context(), d_x); - ctx.device_context().Wait(); - d_x->Resize(in_dims); - } else { - d_x->ShareDataWith(*d_out); - d_x->Resize(in_dims); - } - } -}; -} // namespace operators -} // namespace paddle diff --git a/paddle/fluid/operators/roi_pool_op.cc b/paddle/fluid/operators/roi_pool_op.cc index 293abb0ea4..d6d209d5de 100644 --- a/paddle/fluid/operators/roi_pool_op.cc +++ b/paddle/fluid/operators/roi_pool_op.cc @@ -139,7 +139,20 @@ class ROIPoolOpMaker : public framework::OpProtoAndCheckerMaker { "The pooled output width.") .SetDefault(1); AddComment(R"DOC( -ROIPool operator +**ROIPool Operator** + +Region of interest pooling (also known as RoI pooling) is to perform +is to perform max pooling on inputs of nonuniform sizes to obtain +fixed-size feature maps (e.g. 7*7). + +The operator has three steps: + +1. Dividing each region proposal into equal-sized sections with + the pooled_width and pooled_height + +2. Finding the largest value in each section + +3. Copying these max values to the output buffer ROI Pooling for Faster-RCNN. The link below is a further introduction: https://stackoverflow.com/questions/43430056/what-is-roi-layer-in-fast-rcnn diff --git a/paddle/fluid/operators/row_conv_op.cc b/paddle/fluid/operators/row_conv_op.cc index 20f140f962..10b1b0c899 100644 --- a/paddle/fluid/operators/row_conv_op.cc +++ b/paddle/fluid/operators/row_conv_op.cc @@ -78,23 +78,23 @@ class RowConvOpMaker : public framework::OpProtoAndCheckerMaker { public: void Make() override { AddInput("X", - "(LoDTensor), the input(X) is a LodTensor, which supports " + "the input(X) is a LodTensor, which supports " "variable time-length input sequences. The underlying tensor " "in this LoDTensor is a matrix with shape (T x N), where T " "is the total time steps in this mini-batch and N is the input " "data dimension."); AddInput("Filter", - "(Tensor), the input(Filter) is a learnable parameter. It " + "the input(Filter) is a learnable parameter. It " "is a 2-D tensor with shape (future_context x N), where, " "future_context is the future context length and N is the data " "dimension."); AddOutput("Out", - "(LoDTensor), the output(Out) is a LodTensor, which supports " + "the output(Out) is a LodTensor, which supports " "variable time-length input sequences. The underlying tensor " "in this LodTensor is a matrix with shape T x N, i.e., the " "same shape as X."); AddComment(R"DOC( -Row-convolution Operator. +:strong:`Row-convolution operator` The row convolution is called lookahead convolution. This operator was introduced in the following paper for DeepSpeech2: @@ -114,9 +114,23 @@ and a filter ($W$) of size $context \times d$, the output sequence is convolved as: $$ -out_{i, :} = \sum_{j=i}^{i + context} in_{j,:} \dot W_{i-j, :} +out_{i, :} = \\sum_{j=i}^{i + context} in_{j,:} \\cdot W_{i-j, :} $$ +In the above equation: + +* $Out_{i}$: The i-th row of output variable with shape [1, D]. + +* $\\tau$: Future context size. + +* $X_{j}$: The j-th row of input variable with shape [1, D]. + +* $W_{i-j}$: The (i-j)-th row of parameters with shape [1, D]. + +More details about row_conv please refer to +the design document +https://github.com/PaddlePaddle/Paddle/issues/2228#issuecomment-303903645 . + )DOC"); } }; diff --git a/paddle/fluid/operators/save_load_op_test.cc b/paddle/fluid/operators/save_load_op_test.cc index c4fcc61af4..ccaea0eef2 100644 --- a/paddle/fluid/operators/save_load_op_test.cc +++ b/paddle/fluid/operators/save_load_op_test.cc @@ -139,6 +139,7 @@ TEST(LoadFP16Op, CPU) { save_op->Run(scope, place); auto load_var = scope.Var("out_var"); + load_var->GetMutable(); auto load_op = paddle::framework::OpRegistry::CreateOp( "load", {}, {{"Out", {"out_var"}}}, attrs); load_op->Run(scope, place); diff --git a/paddle/fluid/operators/save_op.cc b/paddle/fluid/operators/save_op.cc index e6d27e2ded..201a51130d 100644 --- a/paddle/fluid/operators/save_op.cc +++ b/paddle/fluid/operators/save_op.cc @@ -22,11 +22,17 @@ limitations under the License. */ #include "paddle/fluid/framework/framework.pb.h" #include "paddle/fluid/framework/lod_tensor.h" #include "paddle/fluid/framework/op_registry.h" +#include "paddle/fluid/framework/selected_rows.h" +#include "paddle/fluid/framework/variable.h" #include "paddle/fluid/platform/device_context.h" namespace paddle { namespace operators { +// define LOOKUP_TABLE_PATH for checkpoint notify to save lookup table variables +// to directory specified. +constexpr char LOOKUP_TABLE_PATH[] = "kLookupTablePath"; + // TODO(yuyang18): If the functions below are needed by other files, move them // to paddle::filesystem namespace. constexpr char kSEP = '/'; @@ -67,9 +73,27 @@ class SaveOp : public framework::OperatorBase { private: void RunImpl(const framework::Scope &scope, const platform::Place &place) const override { + auto iname = Input("X"); + auto *var = scope.FindVar(iname); + PADDLE_ENFORCE(var != nullptr, "Cannot find variable %s for save_op", + iname); + + if (var->IsType()) { + SaveLodTensor(place, var); + } else if (var->IsType()) { + SaveSelectedRows(scope, place, var); + } else { + PADDLE_ENFORCE( + false, + "SaveOp only support LoDTensor and SelectedRows, %s has wrong type", + iname); + } + } + + void SaveLodTensor(const platform::Place &place, + framework::Variable *var) const { auto filename = Attr("file_path"); auto overwrite = Attr("overwrite"); - auto save_as_fp16 = Attr("save_as_fp16"); if (FileExists(filename) && !overwrite) { PADDLE_THROW("%s is existed, cannot save to it when overwrite=false", @@ -78,26 +102,19 @@ class SaveOp : public framework::OperatorBase { MkDirRecursively(DirName(filename).c_str()); - // FIXME(yuyang18): We save variable to local file now, but we should change - // it to save an output stream. - std::ofstream fout(filename); - PADDLE_ENFORCE(static_cast(fout), "Cannot open %s to write", - filename); - - auto iname = Input("X"); - auto *var = scope.FindVar(iname); - PADDLE_ENFORCE(var != nullptr, "Cannot find variable %s for save_op", - iname); - - PADDLE_ENFORCE(var->IsType(), - "SaveOp only support LoDTensor, %s has wrong type", iname); - auto &tensor = var->Get(); // get device context from pool platform::DeviceContextPool &pool = platform::DeviceContextPool::Instance(); auto &dev_ctx = *pool.Get(place); + // FIXME(yuyang18): We save variable to local file now, but we should change + // it to save an output stream. + std::ofstream fout(filename); + PADDLE_ENFORCE(static_cast(fout), "Cannot open %s to write", + filename); + + auto save_as_fp16 = Attr("save_as_fp16"); auto in_dtype = framework::ToDataType(tensor.type()); auto out_dtype = save_as_fp16 ? framework::proto::VarType::FP16 : in_dtype; @@ -112,17 +129,43 @@ class SaveOp : public framework::OperatorBase { } else { framework::SerializeToStream(fout, tensor, dev_ctx); } + fout.close(); + } + + void SaveSelectedRows(const framework::Scope &scope, + const platform::Place &place, + framework::Variable *var) const { + auto *lt_var = scope.FindVar(LOOKUP_TABLE_PATH)->GetMutable(); + PADDLE_ENFORCE( + lt_var != nullptr, + "Can not find variable kLookupTablePath for SaveSelectedRows"); + std::string filename = lt_var->data(); + VLOG(4) << "SaveSelectedRows get File name: " << filename; + + auto &selectedRows = var->Get(); + + // get device context from pool + platform::DeviceContextPool &pool = platform::DeviceContextPool::Instance(); + auto &dev_ctx = *pool.Get(place); + + // FIXME(yuyang18): We save variable to local file now, but we should change + // it to save an output stream. + std::ofstream fout(filename); + PADDLE_ENFORCE(static_cast(fout), "Cannot open %s to write", + filename); + framework::SerializeToStream(fout, selectedRows, dev_ctx); + fout.close(); } }; class SaveOpProtoMaker : public framework::OpProtoAndCheckerMaker { public: void Make() override { - AddInput("X", "(Tensor ) Input tensor to be saved"); + AddInput("X", "(Tensor ) Input LoDTensor and SelectedRows to be saved"); AddComment(R"DOC( Save operator -This operator will serialize and write a tensor variable to file on disk. +This operator will serialize and write LoDTensor / SelectedRows variable to file on disk. )DOC"); AddAttr("overwrite", "(boolean, default true)" @@ -142,9 +185,26 @@ This operator will serialize and write a tensor variable to file on disk. } }; +class SaveOpVarTypeInference : public framework::VarTypeInference { + public: + void operator()(const framework::OpDesc &op_desc, + framework::BlockDesc *block) const override { + auto out_var_name = op_desc.Output(LOOKUP_TABLE_PATH).front(); + auto &out_var = block->FindRecursiveOrCreateVar(out_var_name); + auto var_type = framework::proto::VarType::RAW; + out_var.SetType(var_type); + } +}; + +class SaveOpShapeInference : public framework::InferShapeBase { + public: + void operator()(framework::InferShapeContext *ctx) const override {} +}; } // namespace operators } // namespace paddle namespace ops = paddle::operators; -REGISTER_OPERATOR(save, ops::SaveOp, ops::SaveOpProtoMaker); +REGISTER_OPERATOR(save, ops::SaveOp, paddle::framework::EmptyGradOpMaker, + ops::SaveOpProtoMaker, ops::SaveOpVarTypeInference, + ops::SaveOpShapeInference); diff --git a/paddle/fluid/operators/scale_op.cc b/paddle/fluid/operators/scale_op.cc index 4687e21e71..7f8822e400 100644 --- a/paddle/fluid/operators/scale_op.cc +++ b/paddle/fluid/operators/scale_op.cc @@ -41,13 +41,13 @@ class ScaleOpMaker : public framework::OpProtoAndCheckerMaker { AddInput("X", "(Tensor) Input tensor of scale operator."); AddOutput("Out", "(Tensor) Output tensor of scale operator."); AddComment(R"DOC( -Scale operator +**Scale operator** + +Multiply the input tensor with a float scalar to scale the input tensor. $$Out = scale*X$$ )DOC"); - AddAttr("scale", - "(float, default 1.0)" - "The scaling factor of the scale operator.") + AddAttr("scale", "The scaling factor of the scale operator.") .SetDefault(1.0); } }; diff --git a/paddle/fluid/operators/send_barrier_op.cc b/paddle/fluid/operators/send_barrier_op.cc index c6c975a23c..6b4572dccc 100644 --- a/paddle/fluid/operators/send_barrier_op.cc +++ b/paddle/fluid/operators/send_barrier_op.cc @@ -44,8 +44,8 @@ class SendBarrierOp : public framework::OperatorBase { // For profiling platform::RecordEvent record_event(Type(), &ctx); - detail::RPCClient* rpc_client = - detail::RPCClient::GetInstance(); + distributed::RPCClient* rpc_client = + distributed::RPCClient::GetInstance(); VLOG(3) << "SendBarrierOp sync_mode:" << sync_mode; diff --git a/paddle/fluid/operators/send_op.cc b/paddle/fluid/operators/send_op.cc index 84ec366253..0cac329aaf 100644 --- a/paddle/fluid/operators/send_op.cc +++ b/paddle/fluid/operators/send_op.cc @@ -45,8 +45,8 @@ class SendOp : public framework::OperatorBase { // For profiling platform::RecordEvent record_event(Type(), &ctx); - detail::RPCClient* rpc_client = - detail::RPCClient::GetInstance(); + distributed::RPCClient* rpc_client = + distributed::RPCClient::GetInstance(); for (size_t i = 0; i < ins.size(); i++) { if (NeedSend(scope, ins[i])) { diff --git a/paddle/fluid/operators/send_recv_op_test.cc b/paddle/fluid/operators/send_recv_op_test.cc index e550552b19..aee6180add 100644 --- a/paddle/fluid/operators/send_recv_op_test.cc +++ b/paddle/fluid/operators/send_recv_op_test.cc @@ -129,7 +129,10 @@ void StartServerNet(bool is_sparse, std::atomic *initialized) { // sub program run in listen_and_serv_op, for simple test we use sum f::ProgramDesc program; const auto &root_block = program.Block(0); + std::vector optimize_blocks; auto *optimize_block = program.AppendBlock(root_block); + optimize_blocks.push_back(optimize_block); + auto *prefetch_block = program.AppendBlock(root_block); // X for server side tensors, RX for received tensors, must be of same shape. AddOp("sum", {{"X", {"x0", "x1"}}}, {{"Out", {"Out"}}}, {}, optimize_block, @@ -139,7 +142,7 @@ void StartServerNet(bool is_sparse, std::atomic *initialized) { attrs.insert({"Fanin", 1}); attrs.insert({"ParamList", std::vector({"Out"})}); attrs.insert({"GradList", std::vector({"x1"})}); - attrs.insert({"OptimizeBlock", optimize_block}); + attrs.insert({"optimize_blocks", optimize_blocks}); attrs.insert({"PrefetchBlock", prefetch_block}); attrs.insert({"grad_to_block_id", std::vector({""})}); attrs.insert({"sync_mode", true}); diff --git a/paddle/fluid/operators/sequence_expand_op.h b/paddle/fluid/operators/sequence_expand_op.h index d62c387c3e..39301e1ac0 100644 --- a/paddle/fluid/operators/sequence_expand_op.h +++ b/paddle/fluid/operators/sequence_expand_op.h @@ -151,9 +151,6 @@ struct SequenceExpandGradFunctor { const framework::Vector& x_lod, /*expand source lod*/ const framework::Vector& ref_lod, /*expand referenced lod*/ LoDTensor* dx) { - math::SetConstant set_zero; - set_zero(context, dx, static_cast(0)); - int dout_offset = 0; for (size_t i = 1; i < ref_lod.size(); ++i) { int repeat_num = ref_lod[i] - ref_lod[i - 1]; @@ -187,6 +184,10 @@ class SequenceExpandGradKernel : public framework::OpKernel { g_x->mutable_data(context.GetPlace()); g_x->set_lod(x->lod()); + auto& dev_ctx = context.template device_context(); + math::SetConstant set_zero; + set_zero(dev_ctx, g_x, static_cast(0)); + auto& y_lod = y->lod(); if (ref_level == -1) ref_level = y_lod.size() - 1; // just copy the gradient diff --git a/paddle/fluid/operators/shape_op.cc b/paddle/fluid/operators/shape_op.cc index c75fce7959..b44d5f8980 100644 --- a/paddle/fluid/operators/shape_op.cc +++ b/paddle/fluid/operators/shape_op.cc @@ -36,10 +36,13 @@ class ShapeOpMaker : public framework::OpProtoAndCheckerMaker { public: void Make() override { AddInput("Input", "(Tensor), The input tensor."); - AddOutput("Out", "(Tensor), The shape of input tensor."); + AddOutput("Out", + "(Tensor), The shape of input tensor, the data type of the shape" + " is int64_t, will be on the same device with the input Tensor."); AddComment(R"DOC( -Shape Operator. -Get the shape of input tensor. +Shape Operator + +Get the shape of input tensor. Only support CPU input Tensor now. )DOC"); } }; diff --git a/paddle/fluid/operators/sigmoid_cross_entropy_with_logits_op.cc b/paddle/fluid/operators/sigmoid_cross_entropy_with_logits_op.cc index 135e2a6f7f..c3b0fe3209 100644 --- a/paddle/fluid/operators/sigmoid_cross_entropy_with_logits_op.cc +++ b/paddle/fluid/operators/sigmoid_cross_entropy_with_logits_op.cc @@ -113,14 +113,14 @@ The logistic loss is given as follows: $$loss = -Labels * \log(\sigma(X)) - (1 - Labels) * \log(1 - \sigma(X))$$ -We know that $$\sigma(X) = (1 / (1 + \exp(-X)))$$. By substituting this we get: +We know that $$\sigma(X) = \\frac{1}{1 + \exp(-X)}$$. By substituting this we get: $$loss = X - X * Labels + \log(1 + \exp(-X))$$ For stability and to prevent overflow of $$\exp(-X)$$ when X < 0, we reformulate the loss as follows: - $$loss = \max(X, 0) - X * Labels + \log(1 + \exp(-|X|))$$ + $$loss = \max(X, 0) - X * Labels + \log(1 + \exp(-\|X\|))$$ Both the input `X` and `Labels` can carry the LoD (Level of Details) information. However the output only shares the LoD with input `X`. diff --git a/paddle/fluid/operators/slice_op.cc b/paddle/fluid/operators/slice_op.cc index 61bb445e8b..4bd23d5941 100644 --- a/paddle/fluid/operators/slice_op.cc +++ b/paddle/fluid/operators/slice_op.cc @@ -95,23 +95,26 @@ of that dimension. If the value passed to start or end is larger than the n (the number of elements in this dimension), it represents n. For slicing to the end of a dimension with unknown size, it is recommended to pass in INT_MAX. If axes are omitted, they are set to [0, ..., ndim-1]. - - Example 1: - Given: - data = [ [1, 2, 3, 4], [5, 6, 7, 8], ] - axes = [0, 1] - starts = [1, 0] - ends = [2, 3] - Then: - result = [ [5, 6, 7], ] - - Example 2: - Given: - data = [ [1, 2, 3, 4], [5, 6, 7, 8], ] - starts = [0, 1] - ends = [-1, 1000] - Then: - result = [ [2, 3, 4], ] +Following examples will explain how slice works: + + .. code-block:: text + + Cast1: + Given: + data = [ [1, 2, 3, 4], [5, 6, 7, 8], ] + axes = [0, 1] + starts = [1, 0] + ends = [2, 3] + Then: + result = [ [5, 6, 7], ] + + Cast2: + Given: + data = [ [1, 2, 3, 4], [5, 6, 7, 8], ] + starts = [0, 1] + ends = [-1, 1000] + Then: + result = [ [2, 3, 4], ] )DOC"); } }; diff --git a/paddle/fluid/operators/softmax_mkldnn_op.cc b/paddle/fluid/operators/softmax_mkldnn_op.cc index 14b57b11fe..6668e6b9e9 100644 --- a/paddle/fluid/operators/softmax_mkldnn_op.cc +++ b/paddle/fluid/operators/softmax_mkldnn_op.cc @@ -27,8 +27,81 @@ using paddle::platform::MKLDNNMemDesc; using mkldnn::memory; // Note: paddle has also "memory" namespace using mkldnn::primitive; using mkldnn::softmax_forward; +using mkldnn::softmax_backward; using mkldnn::prop_kind; using mkldnn::stream; +using platform::to_void_cast; + +class SoftmaxMKLDNNHandler : public platform::MKLDNNHandler { + public: + SoftmaxMKLDNNHandler( + std::shared_ptr softmax_pd, + const platform::MKLDNNDeviceContext& dev_ctx, mkldnn::engine engine, + const std::string& base_key) + : platform::MKLDNNHandler(dev_ctx, engine, base_key), + softmax_pd_(softmax_pd) {} + + SoftmaxMKLDNNHandler( + std::shared_ptr softmax_pd, + std::shared_ptr softmax_bwd_pd, + const platform::MKLDNNDeviceContext& dev_ctx, mkldnn::engine engine, + const std::string& base_key) + : platform::MKLDNNHandler(dev_ctx, engine, base_key), + softmax_pd_(softmax_pd), + softmax_bwd_pd_(softmax_bwd_pd) { + // If we are in Grad operatgor then update a key with BWD suffix to + // distinguish from FWD memory primitives + key_ += "-BWD"; + } + + std::shared_ptr AcquireSoftmax( + std::shared_ptr dst_memory_p, + std::shared_ptr src_memory_p) { + /*Generate key*/ + auto prim_key = key_ + "@softmax_p"; + + auto softmax_p = std::static_pointer_cast( + dev_ctx_.GetBlob(prim_key)); + PADDLE_ENFORCE((softmax_p != nullptr) || (is_reusing_ == false), + "Fail to find softmax primitive in device context"); + if (softmax_p == nullptr) { + softmax_p = std::make_shared( + *(softmax_pd_.get()), + *(static_cast(src_memory_p.get())), + *(static_cast(dst_memory_p.get()))); + dev_ctx_.SetBlob(prim_key, softmax_p); + } else { + is_reusing_ = true; + } + + return softmax_p; + } + + std::shared_ptr AcquireSoftmaxBackward( + std::shared_ptr dst_memory_p, + std::shared_ptr diff_dst_memory_p, + std::shared_ptr diff_src_memory_p) { + auto prim_key = key_ + "@softmax_bwd_p"; + auto softmax_bwd_p = std::static_pointer_cast( + dev_ctx_.GetBlob(prim_key)); + PADDLE_ENFORCE((softmax_bwd_p != nullptr) || (is_reusing_ == false), + "Fail to find softmax backward primitive in device context"); + if (softmax_bwd_p == nullptr) { + softmax_bwd_p = std::make_shared( + *softmax_bwd_pd_, *(dst_memory_p.get()), *(diff_dst_memory_p.get()), + *(diff_src_memory_p.get())); + dev_ctx_.SetBlob(prim_key, softmax_bwd_p); + } else { + is_reusing_ = true; + } + + return softmax_bwd_p; + } + + private: + std::shared_ptr softmax_pd_; + std::shared_ptr softmax_bwd_pd_; +}; template class SoftmaxMKLDNNKernel : public paddle::framework::OpKernel { @@ -54,56 +127,27 @@ class SoftmaxMKLDNNKernel : public paddle::framework::OpKernel { // Same memory descriptor to be used for input and output memory::dims softmax_tz = {src_tz[0], src_tz[1]}; // Generate keys for storing/retriving primitives for this operator - // TODO(jczaja): Each MKLDNN operator may have diffrent hashing function - auto gethash = [](memory::dims& operand_dims) { - return std::string(std::to_string(operand_dims[0]) + "-" + - std::to_string(operand_dims[1])); - }; - const std::string key = gethash(softmax_tz); - const std::string key_softmax_p = key + "@softmax_p"; - const std::string key_softmax_src_mem_p = key + "@softmax_src_mem_p"; - const std::string key_softmax_dst_mem_p = key + "@softmax_dst_mem_p"; - - std::shared_ptr softmax_p = dev_ctx.GetBlob(key_softmax_p); - if (softmax_p == nullptr) { - // Currently only NC data format is supported - auto softmax_md = - MKLDNNMemDesc({softmax_tz}, memory::f32, memory::format::nc); - // Normalization is made after innermost dimension eg. C out of NC - auto softmax_desc = softmax_forward::desc(prop_kind::forward_scoring, - softmax_md, 1 /*dim: C*/); - // create memory primitives - auto softmax_src_memory_p = std::make_shared( - memory::primitive_desc{softmax_md, mkldnn_engine}, - static_cast(const_cast(input_data))); - dev_ctx.SetBlob(key_softmax_src_mem_p, softmax_src_memory_p); - auto softmax_dst_memory_p = std::make_shared( - memory::primitive_desc{softmax_md, mkldnn_engine}, - static_cast(output_data)); - dev_ctx.SetBlob(key_softmax_dst_mem_p, softmax_dst_memory_p); - - auto softmax_forward_pd = - std::make_shared(softmax_desc, - mkldnn_engine); - softmax_p = std::make_shared( - *(softmax_forward_pd.get()), - *(static_cast(softmax_src_memory_p.get())), - *(static_cast(softmax_dst_memory_p.get()))); - dev_ctx.SetBlob(key_softmax_p, softmax_p); - } else { - // Primitives already exist - auto src_memory_p = std::static_pointer_cast( - dev_ctx.GetBlob(key_softmax_src_mem_p)); - PADDLE_ENFORCE(src_memory_p != nullptr, - "Fail to find softmax src mem_p in device context"); - auto dst_memory_p = std::static_pointer_cast( - dev_ctx.GetBlob(key_softmax_dst_mem_p)); - PADDLE_ENFORCE(dst_memory_p != nullptr, - "Fail to find softmax dst mem_p in device context"); - src_memory_p->set_data_handle( - reinterpret_cast(const_cast(input_data))); - dst_memory_p->set_data_handle(output_data); - } + const std::string key = + platform::MKLDNNHandler::GetHash(softmax_tz, ctx.op().Output("Out")); + const std::string key_softmax_pd = key + "@softmax_pd"; + + // Currently only NC data format is supported + auto softmax_md = MKLDNNMemDesc( + {softmax_tz}, platform::MKLDNNGetDataType(), memory::format::nc); + // Normalization is made after innermost dimension eg. C out of NC + auto softmax_desc = softmax_forward::desc(prop_kind::forward_scoring, + softmax_md, 1 /*dim: C*/); + auto softmax_pd = std::make_shared( + softmax_desc, mkldnn_engine); + dev_ctx.SetBlob(key_softmax_pd, softmax_pd); + + SoftmaxMKLDNNHandler handler(softmax_pd, dev_ctx, mkldnn_engine, key); + auto softmax_src_memory_p = + handler.AcquireSrcMemory(softmax_md, to_void_cast(input_data)); + auto softmax_dst_memory_p = + handler.AcquireDstMemory(softmax_md, to_void_cast(output_data)); + auto softmax_p = + handler.AcquireSoftmax(softmax_dst_memory_p, softmax_src_memory_p); std::vector pipeline{ *(static_cast(softmax_p.get()))}; @@ -120,6 +164,77 @@ class SoftmaxMKLDNNKernel : public paddle::framework::OpKernel { } }; +template +class SoftmaxMKLDNNGradKernel : public paddle::framework::OpKernel { + public: + void Compute(const paddle::framework::ExecutionContext& ctx) const override { + PADDLE_ENFORCE(paddle::platform::is_cpu_place(ctx.GetPlace()), + "It must use CPUPlace."); + + auto& dev_ctx = ctx.template device_context(); + auto mkldnn_engine = dev_ctx.GetEngine(); + const Tensor* output = ctx.Input("Out"); + const T* dst_data = output->data(); + + auto* dout = ctx.template Input(framework::GradVarName("Out")); + const auto* diff_dst_ptr = dout->template data(); + + auto* dx = + ctx.template Output(framework::GradVarName("X")); + T* diff_src_ptr = dx->template mutable_data(ctx.GetPlace()); + + std::vector dst_tz = paddle::framework::vectorize2int(output->dims()); + std::vector src_tz(dst_tz); + PADDLE_ENFORCE(output->dims().size() == 2UL, + "The input of softmax op must be a 2D matrix."); + // MKL-DNN does support softmax over selected axis. Having 2D Tensor, + // we will make normalization after final eg. axis: 1 + PADDLE_ENFORCE(((src_tz[0] == dst_tz[0]) && (src_tz[1] == dst_tz[1])), + "Softmax input and output dimensions should match"); + // Same memory descriptor to be used for input and output + memory::dims softmax_tz = {src_tz[0], src_tz[1]}; + // Currently only supports NC data format + // retrieve eltwise primitive desc from device context + const std::string key = + platform::MKLDNNHandler::GetHash(softmax_tz, ctx.op().Input("Out")); + const std::string key_softmax_pd = key + "@softmax_pd"; + + auto softmax_pd = + std::static_pointer_cast( + dev_ctx.GetBlob(key_softmax_pd)); + PADDLE_ENFORCE(softmax_pd != nullptr, + "Fail to find softmax_pd in device context"); + + // TODO(jczaja): Add layouts support when there is a need to do so + // Two dimensional softmax does support NC format + auto data_softmax_md = MKLDNNMemDesc( + {softmax_tz}, platform::MKLDNNGetDataType(), memory::format::nc); + auto diff_softmax_md = MKLDNNMemDesc( + {softmax_tz}, platform::MKLDNNGetDataType(), memory::format::nc); + // Normalization is made after innermost dimension eg. C out of NC + auto softmax_bwd_desc = + softmax_backward::desc(diff_softmax_md, data_softmax_md, 1 /* dim: C*/); + auto softmax_bwd_pd = + std::make_shared( + softmax_bwd_desc, mkldnn_engine, *softmax_pd); + + SoftmaxMKLDNNHandler handler(softmax_pd, softmax_bwd_pd, dev_ctx, + mkldnn_engine, key); + auto dst_memory_p = + handler.AcquireDstMemory(data_softmax_md, to_void_cast(dst_data)); + auto diff_dst_memory_p = handler.AcquireDiffDstMemory( + diff_softmax_md, to_void_cast(diff_dst_ptr)); + auto diff_src_memory_p = handler.AcquireDiffSrcMemory( + diff_softmax_md, to_void_cast(diff_src_ptr)); + + // Get primitve from device context + auto softmax_bwd_p = handler.AcquireSoftmaxBackward( + dst_memory_p, diff_dst_memory_p, diff_src_memory_p); + + std::vector pipeline{*softmax_bwd_p}; + stream(stream::kind::eager).submit(pipeline).wait(); + } +}; } // namespace operators } // namespace paddle @@ -127,3 +242,5 @@ namespace ops = paddle::operators; REGISTER_OP_KERNEL(softmax, MKLDNN, ::paddle::platform::CPUPlace, ops::SoftmaxMKLDNNKernel); +REGISTER_OP_KERNEL(softmax_grad, MKLDNN, ::paddle::platform::CPUPlace, + ops::SoftmaxMKLDNNGradKernel); diff --git a/paddle/fluid/operators/softmax_op.cc b/paddle/fluid/operators/softmax_op.cc index 847b3cbd1b..31a7458f63 100644 --- a/paddle/fluid/operators/softmax_op.cc +++ b/paddle/fluid/operators/softmax_op.cc @@ -145,16 +145,30 @@ class SoftmaxOpGrad : public framework::OperatorWithKernel { const framework::ExecutionContext& ctx) const override { // choose cudnn kernel if the runtime supported. framework::LibraryType library_{framework::LibraryType::kPlain}; + std::string data_format = ctx.Attr("data_format"); + framework::DataLayout layout_ = framework::StringToDataLayout(data_format); #ifdef PADDLE_WITH_CUDA if (platform::CanCUDNNBeUsed(ctx)) { library_ = framework::LibraryType::kCUDNN; } #endif - std::string data_format = ctx.Attr("data_format"); - return framework::OpKernelType( - framework::ToDataType(ctx.Input("X")->type()), ctx.GetPlace(), - framework::StringToDataLayout(data_format), library_); +#ifdef PADDLE_WITH_MKLDNN + if (library_ == framework::LibraryType::kPlain && + platform::CanMKLDNNBeUsed(ctx)) { + library_ = framework::LibraryType::kMKLDNN; + layout_ = framework::DataLayout::kMKLDNN; + } +#endif + auto input_data_type = + framework::ToDataType(ctx.Input("X")->type()); + if (input_data_type == framework::proto::VarType::FP16) { + PADDLE_ENFORCE(platform::is_gpu_place(ctx.GetPlace()), + "float16 can only be used on GPU place"); + } + + return framework::OpKernelType(input_data_type, ctx.GetPlace(), layout_, + library_); } }; diff --git a/paddle/fluid/operators/sum_mkldnn_op.cc b/paddle/fluid/operators/sum_mkldnn_op.cc new file mode 100644 index 0000000000..f78d977760 --- /dev/null +++ b/paddle/fluid/operators/sum_mkldnn_op.cc @@ -0,0 +1,240 @@ +// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +/*Licensed under the Apache License, Version 2.0(the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. */ + +#include "mkldnn.hpp" +#include "paddle/fluid/framework/tensor.h" +#include "paddle/fluid/operators/math/selected_rows_functor.h" +#include "paddle/fluid/operators/sum_op.h" +#include "paddle/fluid/platform/device_context.h" +#include "paddle/fluid/platform/mkldnn_helper.h" + +namespace paddle { +namespace operators { + +using paddle::framework::Tensor; +using paddle::platform::MKLDNNDeviceContext; +using paddle::platform::CPUDeviceContext; +using framework::DataLayout; +using mkldnn::memory; +using mkldnn::primitive; +using mkldnn::stream; +using mkldnn::sum; +using mkldnn::reorder; +using platform::to_void_cast; + +template +class SumMKLDNNOpKernel : public paddle::framework::OpKernel { + public: + void Compute(const paddle::framework::ExecutionContext& ctx) const override { + PADDLE_ENFORCE(paddle::platform::is_cpu_place(ctx.GetPlace()), + "It must use CPUPlace."); + auto& dev_ctx = ctx.template device_context(); + const auto& mkldnn_engine = dev_ctx.GetEngine(); + auto in_vars = ctx.MultiInputVar("X"); + + const int N = in_vars.size(); + auto out_var = ctx.OutputVar("Out"); + bool in_place = out_var == in_vars[0]; + + if (out_var->IsType()) { + LoDTensor* output = ctx.Output("Out"); + T* output_data = output->mutable_data(ctx.GetPlace()); + + std::vector dst_tz = framework::vectorize2int(output->dims()); + auto src_tz = dst_tz; + memory::format output_format{memory::format::format_undef}; + std::vector scales; + std::vector srcs_mpd; + std::vector srcs_mem; + + PADDLE_ENFORCE(in_vars[0]->IsType(), + "Input[0] must be LoDTensors"); + auto& input0 = in_vars[0]->Get(); + PADDLE_ENFORCE(input0.layout() == DataLayout::kMKLDNN && + input0.format() != memory::format::format_undef, + "Wrong layout/format for inputs[0]"); + + memory::format input_format = input0.format(); + + if (src_tz.size() == 1 && (input_format == memory::format::nchw || + input_format == memory::format::nhwc)) { + input_format = memory::format::x; + } + if (src_tz.size() == 2 && (input_format == memory::format::nchw || + input_format == memory::format::nhwc)) { + input_format = memory::format::nc; + } + + for (int i = in_place ? 1 : 0; i < N; i++) { + PADDLE_ENFORCE(in_vars[i]->IsType(), + "all inputs must be all LoDTensors"); + auto& input = in_vars[i]->Get(); + PADDLE_ENFORCE(input.layout() == DataLayout::kMKLDNN && + input.format() != memory::format::format_undef, + "Wrong layout/format for inputs"); + + if (input.numel() == 0) { + continue; + } + + const T* input_data = input.data(); + + auto src_md = + memory::desc(src_tz, memory::data_type::f32, input_format); + auto src_mpd = memory::primitive_desc(src_md, mkldnn_engine); + auto src_mem = memory(src_mpd, to_void_cast(input_data)); + srcs_mpd.push_back(src_mpd); + srcs_mem.push_back(src_mem); + scales.push_back(1.0); + } + + auto dst_md = + memory::desc(dst_tz, memory::data_type::f32, memory::format::any); + + auto sum_pd = sum::primitive_desc(dst_md, scales, srcs_mpd); + + std::shared_ptr dst_mem; + if (in_place) { + dst_mem.reset(new memory(sum_pd.dst_primitive_desc())); + } else { + dst_mem.reset(new memory(sum_pd.dst_primitive_desc(), output_data)); + } + std::vector inputs; + for (size_t i = 0; i < srcs_mem.size(); ++i) { + inputs.push_back(srcs_mem[i]); + } + + auto sum_prim = mkldnn::sum(sum_pd, inputs, *dst_mem); + output_format = (memory::format)platform::GetMKLDNNFormat(sum_pd); + + primitive reorder_prim; + std::shared_ptr target_mem; + if (in_place) { + output_format = input_format; + target_mem.reset(new memory( + {{{src_tz}, memory::data_type::f32, output_format}, mkldnn_engine}, + output_data)); + reorder_prim = reorder(*dst_mem, *target_mem); + } + + std::vector pipeline; + pipeline.push_back(sum_prim); + if (in_place) pipeline.push_back(reorder_prim); + stream(stream::kind::eager).submit(pipeline).wait(); + + output->set_layout(DataLayout::kMKLDNN); + output->set_format(output_format); + } else if (out_var->IsType()) { + // TODO(@mozga-intel) Add MKLDNN SelectedRows support + std::unique_ptr in0; + if (in_place) { + // If is in_place, we store the input[0] to in0 + auto& in_sel0 = in_vars[0]->Get(); + auto& rows = in_sel0.rows(); + in0.reset(new framework::SelectedRows(rows, in_sel0.height())); + in0->mutable_value()->ShareDataWith(in_sel0.value()); + } + + auto get_selected_row = [&](size_t i) -> const SelectedRows& { + if (i == 0 && in0) { + return *in0.get(); + } else { + return in_vars[i]->Get(); + } + }; + auto* out = ctx.Output("Out"); + out->mutable_rows()->clear(); + auto* out_value = out->mutable_value(); + + // Runtime InferShape + size_t first_dim = 0; + for (int i = 0; i < N; i++) { + auto& sel_row = get_selected_row(i); + first_dim += sel_row.rows().size(); + } + auto in_dim = + framework::vectorize(get_selected_row(N - 1).value().dims()); + in_dim[0] = static_cast(first_dim); + + out_value->Resize(framework::make_ddim(in_dim)); + + // if all the input sparse vars are empty, no need to + // merge these vars. + if (first_dim == 0UL) { + return; + } + out_value->mutable_data(ctx.GetPlace()); + math::SelectedRowsAddTo functor; + int64_t offset = 0; + for (int i = 0; i < N; i++) { + auto& sel_row = get_selected_row(i); + if (sel_row.rows().size() == 0) { + continue; + } + PADDLE_ENFORCE_EQ(out->height(), sel_row.height()); + functor(ctx.template device_context(), sel_row, + offset, out); + offset += sel_row.value().numel(); + } + } else if (out_var->IsType()) { + // TODO(@mozga-intel) Add MKLDNN LoDTensorArray support + auto& out_array = *out_var->GetMutable(); + for (size_t i = in_place ? 1 : 0; i < in_vars.size(); ++i) { + PADDLE_ENFORCE(in_vars[i]->IsType(), + "Only support all inputs are TensorArray"); + auto& in_array = in_vars[i]->Get(); + + for (size_t i = 0; i < in_array.size(); ++i) { + if (in_array[i].numel() != 0) { + if (i >= out_array.size()) { + out_array.resize(i + 1); + } + if (out_array[i].numel() == 0) { + framework::TensorCopy(in_array[i], in_array[i].place(), + ctx.device_context(), &out_array[i]); + out_array[i].set_lod(in_array[i].lod()); + } else { + PADDLE_ENFORCE(out_array[i].lod() == in_array[i].lod()); + auto in = EigenVector::Flatten(in_array[i]); + auto result = EigenVector::Flatten(out_array[i]); + result.device(*ctx.template device_context() + .eigen_device()) = result + in; + } + } + } + } + } else { + PADDLE_THROW("Unexpected branch, output variable type is %s", + out_var->Type().name()); + } + } +}; + +} // namespace operators +} // namespace paddle + +REGISTER_OP_KERNEL(sum, MKLDNN, ::paddle::platform::CPUPlace, + paddle::operators::SumMKLDNNOpKernel); diff --git a/paddle/fluid/operators/sum_op.cc b/paddle/fluid/operators/sum_op.cc index 863baba9ea..fe7c7039c7 100644 --- a/paddle/fluid/operators/sum_op.cc +++ b/paddle/fluid/operators/sum_op.cc @@ -18,6 +18,10 @@ limitations under the License. */ #include "paddle/fluid/framework/var_type_inference.h" #include "paddle/fluid/operators/detail/safe_ref.h" +#ifdef PADDLE_WITH_MKLDNN +#include "paddle/fluid/platform/mkldnn_helper.h" +#endif + namespace paddle { namespace operators { using framework::Tensor; @@ -63,6 +67,18 @@ class SumOp : public framework::OperatorWithKernel { framework::OpKernelType GetExpectedKernelType( const framework::ExecutionContext& ctx) const override { auto x_vars = ctx.MultiInputVar("X"); + + framework::LibraryType library{framework::LibraryType::kPlain}; + framework::DataLayout layout{framework::DataLayout::kAnyLayout}; + +#ifdef PADDLE_WITH_MKLDNN + if (library == framework::LibraryType::kPlain && + platform::CanMKLDNNBeUsed(ctx)) { + library = framework::LibraryType::kMKLDNN; + layout = framework::DataLayout::kMKLDNN; + } +#endif + if (x_vars[0]->IsType()) { int dtype = -1; for (auto& x_var : x_vars) { @@ -80,26 +96,27 @@ class SumOp : public framework::OperatorWithKernel { "Sum operator should have at least one tensor"); return framework::OpKernelType( - static_cast(dtype), - ctx.device_context()); + static_cast(dtype), ctx.GetPlace(), + layout, library); } else if (x_vars[0]->IsType()) { for (auto& var : x_vars) { auto& value = var->Get().value(); if (value.IsInitialized()) { return framework::OpKernelType(framework::ToDataType(value.type()), - ctx.device_context()); + ctx.device_context(), layout, library); } } // if input sparse vars are not initialized, use an default kernel type. return framework::OpKernelType(framework::proto::VarType::FP32, - ctx.device_context()); + ctx.device_context(), layout, library); } else if (x_vars[0]->IsType()) { for (auto& x_var : x_vars) { auto& array = x_var->Get(); for (auto& each : array) { if (each.numel() != 0) { return framework::OpKernelType(framework::ToDataType(each.type()), - ctx.device_context()); + ctx.device_context(), layout, + library); } } } @@ -116,6 +133,9 @@ class SumOpMaker : public framework::OpProtoAndCheckerMaker { AddInput("X", "(vector) The input tensors of sum operator.") .AsDuplicable(); AddOutput("Out", "(Tensor) The output tensor of sum operator.").Reuse("X"); + AddAttr("use_mkldnn", + "(bool, default false) Only used in mkldnn kernel") + .SetDefault(false); AddComment(R"DOC( Sum operator. @@ -132,7 +152,6 @@ class SumOpVarTypeInference : public framework::VarTypeInference { framework::BlockDesc* block) const override { auto& inputs = op_desc.Input("X"); auto var_type = framework::proto::VarType::SELECTED_ROWS; - for (auto& name : op_desc.Input("X")) { VLOG(10) << name << " " << block->FindRecursiveOrCreateVar(name).GetType(); @@ -206,6 +225,7 @@ namespace ops = paddle::operators; REGISTER_OPERATOR(sum, ops::SumOp, ops::SumOpMaker, ops::SumGradMaker, ops::SumOpVarTypeInference); + REGISTER_OP_CPU_KERNEL( sum, ops::SumKernel, ops::SumKernel, diff --git a/paddle/fluid/operators/tensor_array_read_write_op.cc b/paddle/fluid/operators/tensor_array_read_write_op.cc index c703d11eec..a2d44284e9 100644 --- a/paddle/fluid/operators/tensor_array_read_write_op.cc +++ b/paddle/fluid/operators/tensor_array_read_write_op.cc @@ -38,15 +38,14 @@ class WriteToArrayOp : public ArrayOp { << " to " << offset + 1; out->resize(offset + 1); } + auto *out_tensor = &out->at(offset); + out_tensor->set_lod(x_tensor.lod()); if (x_tensor.memory_size() > 0) { - auto *out_tensor = &out->at(offset); - platform::DeviceContextPool &pool = platform::DeviceContextPool::Instance(); auto &dev_ctx = *pool.Get(place); TensorCopy(x_tensor, place, dev_ctx, out_tensor); - out_tensor->set_lod(x_tensor.lod()); } else { VLOG(10) << "WARNING: The input tensor 'x_tensor' holds no memory, so " "nothing has been written to output array[" diff --git a/paddle/fluid/operators/tensorrt_engine_op.cc b/paddle/fluid/operators/tensorrt_engine_op.cc index 4b1208c437..647cfc0a0a 100644 --- a/paddle/fluid/operators/tensorrt_engine_op.cc +++ b/paddle/fluid/operators/tensorrt_engine_op.cc @@ -14,11 +14,14 @@ #ifdef PADDLE_WITH_CUDA -#include "paddle/fluid/operators/tensorrt_engine_op.h" +#include +#include + #include "paddle/fluid/framework/op_registry.h" #include "paddle/fluid/inference/tensorrt/convert/op_converter.h" #include "paddle/fluid/inference/tensorrt/engine.h" #include "paddle/fluid/inference/utils/singleton.h" +#include "paddle/fluid/operators/tensorrt_engine_op.h" namespace paddle { namespace operators { @@ -66,17 +69,25 @@ nvinfer1::Dims Vec2TRT_Dims(const std::vector &shape) { } // namespace template -void paddle::operators::TensorRTEngineKernel::Prepare( +void TensorRTEngineKernel::Prepare( const framework::ExecutionContext &context) const { VLOG(4) << "Prepare engine"; // Get the ProgramDesc and pass to convert. framework::proto::BlockDesc block_desc; block_desc.ParseFromString(context.Attr("subgraph")); - max_batch_ = context.Attr("max_batch"); + int max_batch = context.Attr("max_batch"); auto max_workspace = context.Attr("max_workspace"); - engine_ = Singleton::Global().Create( - max_batch_, max_workspace, &stream_); - engine_->InitNetwork(); + auto params = context.Attr>("parameters"); + std::unordered_set parameters; + for (const auto ¶m : params) { + parameters.insert(param); + } + + // TODO(Superjomn) replace this with a different stream + auto *engine = Singleton::Global().Create( + max_batch, max_workspace, nullptr /*engine hold its own stream*/, + context.Attr("engine_uniq_key")); + engine->InitNetwork(); framework::BlockDesc block(nullptr /*programdesc*/, &block_desc); // Add inputs @@ -87,24 +98,23 @@ void paddle::operators::TensorRTEngineKernel::Prepare( PADDLE_ENFORCE_EQ(var->GetType(), FluidDT::VarType_Type_LOD_TENSOR, "TensorRT engine only takes LoDTensor as input"); auto shape = var->GetShape(); - engine_->DeclareInput( + engine->DeclareInput( input, FluidDataType2TRT( var->Proto()->type().lod_tensor().tensor().data_type()), Vec2TRT_Dims(var->GetShape())); } - // TODO(Superjomn) parameters should be passed after analysised from outside. inference::Singleton::Global().ConvertBlock( - block_desc, {}, context.scope(), engine_); + block_desc, parameters, context.scope(), engine); // Add outputs VLOG(4) << "declare outputs"; for (auto &output : context.Outputs("Ys")) { VLOG(4) << "declare output " << output; - engine_->DeclareOutput(output); + engine->DeclareOutput(output); } - engine_->FreezeNetwork(); + engine->FreezeNetwork(); } class TensorRTEngineOpMaker : public framework::OpProtoAndCheckerMaker { @@ -113,6 +123,7 @@ class TensorRTEngineOpMaker : public framework::OpProtoAndCheckerMaker { AddInput("Xs", "A list of inputs.").AsDuplicable(); AddOutput("Ys", "A list of outputs").AsDuplicable(); AddAttr("subgraph", "the subgraph."); + AddAttr("engine_uniq_key", "unique key for the TRT engine."); AddAttr("max_batch", "the maximum batch size."); AddAttr("max_workspace", "the maximum batch size."); AddComment("TensorRT engine operator."); diff --git a/paddle/fluid/operators/tensorrt_engine_op.h b/paddle/fluid/operators/tensorrt_engine_op.h index 4b089601ff..1602a913ae 100644 --- a/paddle/fluid/operators/tensorrt_engine_op.h +++ b/paddle/fluid/operators/tensorrt_engine_op.h @@ -16,6 +16,9 @@ #ifdef PADDLE_WITH_CUDA +#include +#include + #include "paddle/fluid/framework/operator.h" #include "paddle/fluid/inference/analysis/helper.h" #include "paddle/fluid/inference/tensorrt/engine.h" @@ -23,6 +26,9 @@ namespace paddle { namespace operators { +using inference::Singleton; +using inference::tensorrt::TRT_EngineManager; + class TensorRTEngineOp : public framework::OperatorWithKernel { public: using framework::OperatorWithKernel::OperatorWithKernel; @@ -47,16 +53,19 @@ template class TensorRTEngineKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext& context) const override { - if (!engine_) { + VLOG(4) << "TensorRTEngineKernel executing"; + auto engine_name = context.Attr("engine_uniq_key"); + if (!Singleton::Global().HasEngine(engine_name)) { Prepare(context); } + auto* engine = Singleton::Global().Get(engine_name); auto input_names = context.op().Inputs("Xs"); PADDLE_ENFORCE(!input_names.empty(), "should pass more than one inputs"); // Try to determine a batch_size auto& tensor0 = inference::analysis::GetFromScope( context.scope(), input_names.front()); int batch_size = tensor0.dims()[0]; - PADDLE_ENFORCE_LE(batch_size, max_batch_); + PADDLE_ENFORCE_LE(batch_size, context.Attr("max_batch")); // Convert input tensor from fluid to engine. for (const auto& x : context.Inputs("Xs")) { @@ -64,20 +73,20 @@ class TensorRTEngineKernel : public framework::OpKernel { auto& t = inference::analysis::GetFromScope( context.scope(), x); if (platform::is_cpu_place(t.place())) { - engine_->SetInputFromCPU(x, static_cast(t.data()), - t.memory_size()); + engine->SetInputFromCPU(x, static_cast(t.data()), + t.memory_size()); } else { - engine_->SetInputFromGPU(x, static_cast(t.data()), - t.memory_size()); + engine->SetInputFromGPU(x, static_cast(t.data()), + t.memory_size()); } } // Execute the engine. PADDLE_ENFORCE_GT(batch_size, 0); - engine_->Execute(batch_size); + engine->Execute(batch_size); // Convert output tensor from engine to fluid for (const auto& y : context.Outputs("Ys")) { // convert output and copy to fluid. - nvinfer1::ITensor* trt_t = engine_->GetITensor(y); + nvinfer1::ITensor* trt_t = engine->GetITensor(y); auto dims = trt_t->getDimensions(); // Use the output ITensor's dims to reshape the Fluid Tensor. std::vector ddim(dims.d, dims.d + dims.nbDims); @@ -89,27 +98,22 @@ class TensorRTEngineKernel : public framework::OpKernel { auto size = inference::analysis::AccuDims(dims.d, dims.nbDims); if (platform::is_cpu_place(fluid_t->place())) { // TODO(Superjomn) change this float to dtype size. - engine_->GetOutputInCPU( + engine->GetOutputInCPU( y, fluid_t->mutable_data(platform::CPUPlace()), size * sizeof(float)); } else { - engine_->GetOutputInGPU( + engine->GetOutputInGPU( y, fluid_t->mutable_data(platform::CUDAPlace()), size * sizeof(float)); } } - cudaStreamSynchronize(stream_); + cudaStreamSynchronize(*engine->stream()); } protected: // Build the engine. void Prepare(const framework::ExecutionContext& context) const; - - private: - mutable cudaStream_t stream_; - mutable inference::tensorrt::TensorRTEngine* engine_{nullptr}; - mutable int max_batch_{0}; }; } // namespace operators diff --git a/paddle/fluid/operators/tensorrt_engine_op_test.cc b/paddle/fluid/operators/tensorrt_engine_op_test.cc index 6f383de259..82a16361e4 100644 --- a/paddle/fluid/operators/tensorrt_engine_op_test.cc +++ b/paddle/fluid/operators/tensorrt_engine_op_test.cc @@ -19,6 +19,7 @@ limitations under the License. */ #include "paddle/fluid/framework/op_registry.h" #include "paddle/fluid/framework/program_desc.h" #include "paddle/fluid/framework/scope.h" +#include "paddle/fluid/inference/analysis/helper.h" #include "paddle/fluid/inference/tensorrt/convert/op_converter.h" #include "paddle/fluid/inference/tensorrt/convert/ut_helper.h" @@ -51,37 +52,10 @@ void AddTensorToBlockDesc(framework::proto::BlockDesc* block, *var = *desc.Proto(); } -template -void SetAttr(framework::proto::OpDesc* op, const std::string& name, - const T& data); - -template <> -void SetAttr(framework::proto::OpDesc* op, const std::string& name, - const std::string& data) { - auto* attr = op->add_attrs(); - attr->set_name(name); - attr->set_type(paddle::framework::proto::AttrType::STRING); - attr->set_s(data); -} -template <> -void SetAttr(framework::proto::OpDesc* op, const std::string& name, - const int& data) { - auto* attr = op->add_attrs(); - attr->set_name(name); - attr->set_type(paddle::framework::proto::AttrType::INT); - attr->set_i(data); -} -template <> -void SetAttr(framework::proto::OpDesc* op, const std::string& name, - const int64_t& data) { - auto* attr = op->add_attrs(); - attr->set_name(name); - attr->set_type(paddle::framework::proto::AttrType::LONG); - attr->set_l(data); -} - } // namespace +using inference::analysis::SetAttr; + TEST(TensorRTEngineOp, manual) { framework::ProgramDesc program; auto* block_ = program.Proto()->add_blocks(); @@ -123,11 +97,15 @@ TEST(TensorRTEngineOp, manual) { engine_op_desc.SetOutput("Ys", std::vector({"z0"})); SetAttr(engine_op_desc.Proto(), "subgraph", block_->SerializeAsString()); - SetAttr(engine_op_desc.Proto(), "max_batch", 30); + SetAttr(engine_op_desc.Proto(), "max_batch", 100); SetAttr(engine_op_desc.Proto(), "max_workspace", 1 << 10); + SetAttr(engine_op_desc.Proto(), "engine_uniq_key", "a_engine"); + SetAttr>(engine_op_desc.Proto(), "parameters", + std::vector({})); LOG(INFO) << "create engine op"; auto engine_op = framework::OpRegistry::CreateOp(*engine_op_desc.Proto()); + LOG(INFO) << "engine_op " << engine_op.get(); framework::Scope scope; platform::CPUPlace place; @@ -145,6 +123,87 @@ TEST(TensorRTEngineOp, manual) { engine_op->Run(scope, place); } +void Execute(int batch_size, int input_dim, int output_dim, int nlayers = 1) { + framework::ProgramDesc program; + framework::Scope scope; + platform::CPUPlace place; + platform::CPUDeviceContext ctx(place); + + auto* block_ = program.Proto()->add_blocks(); + block_->set_idx(0); + block_->set_parent_idx(-1); + + using shape_t = std::vector; + + LOG(INFO) << "create block desc"; + framework::BlockDesc block_desc(&program, block_); + + auto AddFCLayer = [&](const std::string& x_name, const std::string& y_name, + const std::string& z_name, bool x_created, + const shape_t& x_shape, const shape_t& y_shape, + const shape_t& z_shape) { + LOG(INFO) << "create fc op"; + auto* fc = block_desc.AppendOp(); + fc->SetType("mul"); + fc->SetInput("X", std::vector({x_name})); + fc->SetInput("Y", std::vector({y_name})); + fc->SetOutput("Out", std::vector({z_name})); + + // Set inputs' variable shape in BlockDesc + if (!x_created) { + AddTensorToBlockDesc(block_, x_name, + std::vector({batch_size, input_dim, 1, 1})); + } + AddTensorToBlockDesc(block_, y_name, + std::vector({input_dim, output_dim})); + AddTensorToBlockDesc(block_, z_name, + std::vector({batch_size, output_dim})); + + // Prepare variables. + if (!x_created) { + CreateCPUTensor(&scope, x_name, std::vector(x_shape)); + } + CreateCPUTensor(&scope, y_name, std::vector(y_shape)); + CreateCPUTensor(&scope, z_name, std::vector(z_shape)); + + // It is wired, need to copy manually. + *block_->add_ops() = *fc->Proto(); + }; + + // Test with 4 layer FC + AddFCLayer("x0", "y0", "z0", false, {batch_size, input_dim}, + {input_dim, output_dim}, {batch_size, output_dim}); + AddFCLayer("z0", "y1", "z1", true, {}, {output_dim, output_dim}, + {batch_size, output_dim}); + AddFCLayer("z1", "y2", "z2", true, {}, {output_dim, output_dim}, + {batch_size, output_dim}); + AddFCLayer("z2", "y3", "z3", true, {}, {output_dim, output_dim}, + {batch_size, output_dim}); + + LOG(INFO) << "create tensorrt desc"; + framework::OpDesc engine_op_desc(nullptr); + engine_op_desc.SetType("tensorrt_engine"); + engine_op_desc.SetInput("Xs", std::vector({"x0"})); + engine_op_desc.SetOutput("Ys", std::vector({"z3"})); + + SetAttr(engine_op_desc.Proto(), "subgraph", + block_->SerializeAsString()); + SetAttr(engine_op_desc.Proto(), "max_batch", batch_size); + SetAttr(engine_op_desc.Proto(), "max_workspace", 2 << 10); + SetAttr>( + engine_op_desc.Proto(), "parameters", + std::vector({"y0", "y1", "y2", "y3"})); + SetAttr(engine_op_desc.Proto(), "engine_uniq_key", "b_engine"); + + auto engine_op = framework::OpRegistry::CreateOp(*engine_op_desc.Proto()); + + // Execute them. + engine_op->Run(scope, place); +} + +// Test with a larger FC layer. +TEST(TensorRTEngineOp, fc) { Execute(40, 28, 28); } + } // namespace operators } // namespace paddle diff --git a/paddle/fluid/operators/test_send_nccl_id.cc b/paddle/fluid/operators/test_send_nccl_id.cc index 5015b10055..e2b7b6b8e4 100644 --- a/paddle/fluid/operators/test_send_nccl_id.cc +++ b/paddle/fluid/operators/test_send_nccl_id.cc @@ -21,7 +21,7 @@ limitations under the License. */ #include "paddle/fluid/framework/operator.h" #include "paddle/fluid/framework/program_desc.h" #include "paddle/fluid/operators/detail/macros.h" -#include "paddle/fluid/operators/detail/request_handler_impl.h" +#include "paddle/fluid/operators/distributed/request_handler_impl.h" #include "paddle/fluid/operators/listen_and_serv_op.h" #include "paddle/fluid/operators/math/math_function.h" #include "paddle/fluid/operators/math/selected_rows_functor.h" @@ -37,11 +37,11 @@ USE_NO_KERNEL_OP(listen_and_serv); namespace f = paddle::framework; namespace p = paddle::platform; namespace m = paddle::operators::math; -namespace detail = paddle::operators::detail; +namespace distributed = paddle::operators::distributed; namespace string = paddle::string; -std::unique_ptr g_rpc_service; -std::unique_ptr g_req_handler; +std::unique_ptr g_rpc_service; +std::unique_ptr g_req_handler; void StartServer() { f::Scope scope; @@ -57,14 +57,14 @@ void StartServer() { g_req_handler->SetProgram(&empty_program); g_req_handler->SetExecutor(&executor); - g_rpc_service->RegisterRPC(detail::kRequestSend, g_req_handler.get()); + g_rpc_service->RegisterRPC(distributed::kRequestSend, g_req_handler.get()); g_req_handler->SetRPCServer(g_rpc_service.get()); std::thread server_thread( - std::bind(&detail::RPCServer::StartServer, g_rpc_service.get())); + std::bind(&distributed::RPCServer::StartServer, g_rpc_service.get())); - g_rpc_service->SetCond(detail::kRequestSend); - g_rpc_service->WaitBarrier(detail::kRequestSend); + g_rpc_service->SetCond(distributed::kRequestSend); + g_rpc_service->WaitBarrier(distributed::kRequestSend); LOG(INFO) << "got nccl id and stop server..."; g_rpc_service->ShutDown(); @@ -72,7 +72,7 @@ void StartServer() { } TEST(SendNcclId, RPCServer) { - g_req_handler.reset(new detail::RequestSendHandler(true)); + g_req_handler.reset(new distributed::RequestSendHandler(true)); g_rpc_service.reset(new RPCSERVER_T("127.0.0.1:0", 1)); std::thread server_thread(StartServer); @@ -91,7 +91,8 @@ TEST(SendNcclId, RPCServer) { std::string ep = string::Sprintf("127.0.0.1:%d", port); - detail::RPCClient* client = detail::RPCClient::GetInstance(); + distributed::RPCClient* client = + distributed::RPCClient::GetInstance(); LOG(INFO) << "connect to server" << ep; client->AsyncSendVar(ep, dev_ctx, scope, NCCL_ID_VARNAME); diff --git a/paddle/fluid/operators/uniform_random_batch_size_like_op.cc b/paddle/fluid/operators/uniform_random_batch_size_like_op.cc index 78fee77df8..75d6181749 100644 --- a/paddle/fluid/operators/uniform_random_batch_size_like_op.cc +++ b/paddle/fluid/operators/uniform_random_batch_size_like_op.cc @@ -35,10 +35,10 @@ class UniformRandomBatchSizeLikeOpMaker : public BatchSizeLikeOpMaker { protected: void Apply() override { AddComment(R"DOC( -Uniform random operator +UniformRandomBatchSizeLike operator. This operator initializes a tensor with the same batch_size as the Input tensor - with random values sampled from a uniform distribution. +with random values sampled from a uniform distribution. )DOC"); AddAttr("min", diff --git a/paddle/fluid/operators/uniform_random_op.cc b/paddle/fluid/operators/uniform_random_op.cc index 137ea91cae..edd1baa4ac 100644 --- a/paddle/fluid/operators/uniform_random_op.cc +++ b/paddle/fluid/operators/uniform_random_op.cc @@ -86,32 +86,24 @@ class UniformRandomOp : public framework::OperatorWithKernel { class UniformRandomOpMaker : public framework::OpProtoAndCheckerMaker { public: void Make() override { - AddOutput("Out", "(Tensor) The output tensor of uniform random op"); + AddOutput("Out", "The output tensor of uniform random op"); AddComment(R"DOC( -Uniform random operator. - This operator initializes a tensor with random values sampled from a -uniform distribution. +uniform distribution. The random result is in set [min, max]. )DOC"); - AddAttr>("shape", - "(vector) The shape of the output tensor"); - AddAttr("min", - "(float, default -1.0) " - "Minimum value of uniform random") + AddAttr>("shape", "The shape of the output tensor"); + AddAttr("min", "Minimum value of uniform random. [default -1.0].") .SetDefault(-1.0f); - AddAttr("max", - "(float, default 1.0) " - "Maximun value of uniform random") + AddAttr("max", "Maximun value of uniform random. [default 1.0].") .SetDefault(1.0f); AddAttr("seed", - "(int, default 0) " "Random seed used for generating samples. " "0 means use a seed generated by the system." "Note that if seed is not 0, this operator will always " - "generate the same random numbers every time.") + "generate the same random numbers every time. [default 0].") .SetDefault(0); - AddAttr("dtype", "(int, default 5(FP32)) Output tensor data type") + AddAttr("dtype", "Output tensor data type. [default 5(FP32)].") .SetDefault(framework::proto::VarType::FP32); } }; diff --git a/paddle/fluid/operators/while_op.cc b/paddle/fluid/operators/while_op.cc index 175c3ac5d7..733157ea05 100644 --- a/paddle/fluid/operators/while_op.cc +++ b/paddle/fluid/operators/while_op.cc @@ -17,6 +17,7 @@ limitations under the License. */ #include "paddle/fluid/framework/lod_tensor_array.h" #include "paddle/fluid/framework/op_registry.h" #include "paddle/fluid/framework/operator.h" +#include "paddle/fluid/framework/var_type.h" #include "paddle/fluid/operators/detail/safe_ref.h" namespace paddle { @@ -135,15 +136,14 @@ class WhileGradOp : public framework::OperatorBase { auto &og_inside = detail::Ref(cur_scope.Var(inside_og_name), "Cannot find inside gradient %s", inside_og_name); - if (og_outside.Type().hash_code() == - typeid(framework::LoDTensor).hash_code()) { + if (framework::IsType(og_outside.Type())) { auto &outside_tensor = og_outside.Get(); auto &inside_tensor = detail::Ref(og_inside.GetMutable()); inside_tensor.set_lod(outside_tensor.lod()); inside_tensor.ShareDataWith(outside_tensor); - } else if (og_outside.Type().hash_code() == - typeid(framework::LoDTensorArray).hash_code()) { + } else if (framework::IsType( + og_outside.Type())) { auto &outside_array = og_outside.Get(); auto &inside_array = detail::Ref(og_inside.GetMutable()); @@ -203,11 +203,11 @@ class WhileGradOp : public framework::OperatorBase { ->set_lod(inside_tensor.lod()); } } - auto new_inside_name = cur_scope.Rename(inside_grad_name); auto sum_op = framework::OpRegistry::CreateOp( "sum", {{"X", {pg_names[param_id], new_inside_name}}}, - {{"Out", {pg_names[param_id]}}}, framework::AttributeMap{}); + {{"Out", {pg_names[param_id]}}}, + framework::AttributeMap{{"use_mkldnn", {false}}}); sum_op->Run(cur_scope, dev_place); cur_scope.Rename(new_inside_name, inside_grad_name); } diff --git a/paddle/fluid/platform/CMakeLists.txt b/paddle/fluid/platform/CMakeLists.txt index b29035bafd..20037d0764 100644 --- a/paddle/fluid/platform/CMakeLists.txt +++ b/paddle/fluid/platform/CMakeLists.txt @@ -28,6 +28,9 @@ cc_test(place_test SRCS place_test.cc DEPS place glog gflags) add_subdirectory(dynload) +cc_library(cpu_helper SRCS cpu_helper.cc DEPS cblas enforce) +cc_test(cpu_helper_test SRCS cpu_helper_test.cc DEPS cpu_helper) + IF(WITH_GPU) set(GPU_CTX_DEPS dynload_cuda dynamic_loader) ELSE() @@ -42,10 +45,12 @@ ENDIF() # memcpy depends on device_context, here add deps individually for # avoiding cycle dependencies -cc_library(device_context SRCS device_context.cc DEPS malloc - place eigen3 ${GPU_CTX_DEPS} ${MKLDNN_CTX_DEPS}) +cc_library(device_context SRCS device_context.cc init.cc DEPS malloc + place eigen3 stringpiece cpu_helper ${GPU_CTX_DEPS} ${MKLDNN_CTX_DEPS}) nv_test(device_context_test SRCS device_context_test.cu DEPS device_context gpu_info) +cc_test(init_test SRCS init_test.cc DEPS device_context) + nv_test(cudnn_helper_test SRCS cudnn_helper_test.cc DEPS dynload_cuda) nv_test(transform_test SRCS transform_test.cu DEPS memory place device_context) @@ -53,5 +58,5 @@ cc_library(device_tracer SRCS device_tracer.cc DEPS boost profiler_proto framewo cc_library(profiler SRCS profiler.cc DEPS device_context device_tracer) cc_test(profiler_test SRCS profiler_test.cc DEPS profiler) -nv_test(float16_gpu_test SRCS float16_test.cu) -cc_test(float16_test SRCS float16_test.cc) +nv_test(float16_gpu_test SRCS float16_test.cu DEPS lod_tensor) +cc_test(float16_test SRCS float16_test.cc DEPS lod_tensor) diff --git a/paddle/fluid/platform/cpu_helper.cc b/paddle/fluid/platform/cpu_helper.cc new file mode 100644 index 0000000000..77ecb17011 --- /dev/null +++ b/paddle/fluid/platform/cpu_helper.cc @@ -0,0 +1,42 @@ +/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#include "paddle/fluid/platform/cpu_helper.h" +#include "paddle/fluid/platform/enforce.h" + +#ifdef PADDLE_WITH_MKLML +#include "paddle/fluid/platform/dynload/mklml.h" +#endif + +#ifdef PADDLE_USE_OPENBLAS +#include +#endif + +namespace paddle { +namespace platform { + +void SetNumThreads(int num_threads) { +#ifdef PADDLE_USE_OPENBLAS + int real_num_threads = num_threads > 1 ? num_threads : 1; + openblas_set_num_threads(real_num_threads); +#elif defined(PADDLE_WITH_MKLML) + int real_num_threads = num_threads > 1 ? num_threads : 1; + platform::dynload::MKL_Set_Num_Threads(real_num_threads); +#else + PADDLE_ENFORCE(false, "To be implemented."); +#endif +} + +} // namespace platform +} // namespace paddle diff --git a/paddle/fluid/platform/cpu_helper.h b/paddle/fluid/platform/cpu_helper.h new file mode 100644 index 0000000000..78fc392b63 --- /dev/null +++ b/paddle/fluid/platform/cpu_helper.h @@ -0,0 +1,26 @@ +/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#pragma once + +#include + +namespace paddle { +namespace platform { + +//! Set the number of threads in use. +void SetNumThreads(int num_threads); + +} // namespace platform +} // namespace paddle diff --git a/paddle/fluid/platform/cpu_helper_test.cc b/paddle/fluid/platform/cpu_helper_test.cc new file mode 100644 index 0000000000..dc1b2b56cd --- /dev/null +++ b/paddle/fluid/platform/cpu_helper_test.cc @@ -0,0 +1,22 @@ +/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#include "paddle/fluid/platform/cpu_helper.h" + +#include "gtest/gtest.h" + +TEST(CpuHelper, SetNumThread) { + paddle::platform::SetNumThreads(1); + paddle::platform::SetNumThreads(4); +} diff --git a/paddle/fluid/platform/cpu_info.cc b/paddle/fluid/platform/cpu_info.cc index 40dc7c9a0b..f832d72b53 100644 --- a/paddle/fluid/platform/cpu_info.cc +++ b/paddle/fluid/platform/cpu_info.cc @@ -28,9 +28,15 @@ DEFINE_double(fraction_of_cpu_memory_to_use, 1, "Default use 100% of CPU memory for PaddlePaddle," "reserve the rest for page tables, etc"); -DEFINE_uint64( - initial_cpu_memory_in_mb, 500, - "Default initial 500MB of CPU memory for PaddlePaddle, in MD unit."); +DEFINE_uint64(initial_cpu_memory_in_mb, +#ifdef PADDLE_WITH_MKLDNN + /* Aligned with mozga-intel, MKLDNN need at least 5000 MB + * to obtain the best performance*/ + 5000, +#else + 500, +#endif + "Initial CPU memory for PaddlePaddle, in MD unit."); DEFINE_double( fraction_of_cuda_pinned_memory_to_use, 0.5, @@ -59,10 +65,7 @@ inline size_t CpuTotalPhysicalMemory() { size_t CpuMaxAllocSize() { // For distributed systems, it requires configuring and limiting // the fraction of memory to use. - return std::min( - static_cast(FLAGS_fraction_of_cpu_memory_to_use * - CpuTotalPhysicalMemory()), - static_cast(FLAGS_initial_cpu_memory_in_mb * 1 << 20)); + return FLAGS_fraction_of_cpu_memory_to_use * CpuTotalPhysicalMemory(); } size_t CpuMinChunkSize() { @@ -71,8 +74,11 @@ size_t CpuMinChunkSize() { } size_t CpuMaxChunkSize() { - // Allow to allocate the maximum chunk size is roughly 3% of CPU memory. - return CpuMaxAllocSize() / 32; + // Allow to allocate the maximum chunk size is roughly 3% of CPU memory, + // or the initial_cpu_memory_in_mb. + return std::min( + static_cast(CpuMaxAllocSize() / 32), + static_cast(FLAGS_initial_cpu_memory_in_mb * 1 << 20)); } size_t CUDAPinnedMaxAllocSize() { diff --git a/paddle/fluid/platform/device_context.cc b/paddle/fluid/platform/device_context.cc index 6c50ab2685..2cc26da013 100644 --- a/paddle/fluid/platform/device_context.cc +++ b/paddle/fluid/platform/device_context.cc @@ -10,6 +10,7 @@ See the License for the specific language governing permissions and limitations under the License. */ #include "paddle/fluid/platform/device_context.h" +#include #include #include #include @@ -35,7 +36,7 @@ DeviceContextPool::DeviceContextPool( const std::vector& places) { PADDLE_ENFORCE_GT(places.size(), 0); using PtrType = std::unique_ptr; - std::unordered_set set; + std::set set; for (auto& p : places) { set.insert(p); } diff --git a/paddle/fluid/platform/device_context.h b/paddle/fluid/platform/device_context.h index 292ffef1ae..88e0383146 100644 --- a/paddle/fluid/platform/device_context.h +++ b/paddle/fluid/platform/device_context.h @@ -27,12 +27,12 @@ limitations under the License. */ #include #endif +#include +#include "glog/logging.h" #include "paddle/fluid/platform/enforce.h" #include "paddle/fluid/platform/place.h" #include "unsupported/Eigen/CXX11/Tensor" -#include "glog/logging.h" - namespace paddle { namespace platform { @@ -201,9 +201,7 @@ class DeviceContextPool { private: static DeviceContextPool* pool; - std::unordered_map, PlaceHash> - device_contexts_; + std::map> device_contexts_; DISABLE_COPY_AND_ASSIGN(DeviceContextPool); }; diff --git a/paddle/fluid/platform/device_context_test.cu b/paddle/fluid/platform/device_context_test.cu index fa806aba6d..171d2979a0 100644 --- a/paddle/fluid/platform/device_context_test.cu +++ b/paddle/fluid/platform/device_context_test.cu @@ -69,19 +69,3 @@ TEST(Device, DeviceContextPool) { ASSERT_NE(dev_ctx, nullptr); } } - -int main(int argc, char** argv) { - std::vector places; - - places.emplace_back(paddle::platform::CPUPlace()); - int count = paddle::platform::GetCUDADeviceCount(); - for (int i = 0; i < count; ++i) { - places.emplace_back(paddle::platform::CUDAPlace(i)); - } - - VLOG(0) << " DeviceCount " << count; - paddle::platform::DeviceContextPool::Init(places); - - testing::InitGoogleTest(&argc, argv); - return RUN_ALL_TESTS(); -} diff --git a/paddle/fluid/platform/dynload/CMakeLists.txt b/paddle/fluid/platform/dynload/CMakeLists.txt index 364c4901b2..9da787a407 100644 --- a/paddle/fluid/platform/dynload/CMakeLists.txt +++ b/paddle/fluid/platform/dynload/CMakeLists.txt @@ -1,14 +1,23 @@ cc_library(dynamic_loader SRCS dynamic_loader.cc DEPS glog gflags enforce) -list(APPEND CUDA_SRCS cublas.cc cudnn.cc curand.cc nccl.cc) +list(APPEND CUDA_SRCS cublas.cc cudnn.cc curand.cc) + +# There is no macOS version of NCCL. +if (NOT APPLE) + list(APPEND CUDA_SRCS nccl.cc) +endif() + if (TENSORRT_FOUND) list(APPEND CUDA_SRCS tensorrt.cc) endif() - configure_file(cupti_lib_path.h.in ${CMAKE_CURRENT_BINARY_DIR}/cupti_lib_path.h) if (CUPTI_FOUND) list(APPEND CUDA_SRCS cupti.cc) endif(CUPTI_FOUND) nv_library(dynload_cuda SRCS ${CUDA_SRCS} DEPS dynamic_loader) cc_library(dynload_warpctc SRCS warpctc.cc DEPS dynamic_loader warpctc) +if (WITH_MKLML) + cc_library(dynload_mklml SRCS mklml.cc DEPS dynamic_loader mklml) +endif() +# TODO(TJ): add iomp, mkldnn? diff --git a/paddle/fluid/platform/dynload/dynamic_loader.cc b/paddle/fluid/platform/dynload/dynamic_loader.cc index 19c01dc5a9..93bf7c1351 100644 --- a/paddle/fluid/platform/dynload/dynamic_loader.cc +++ b/paddle/fluid/platform/dynload/dynamic_loader.cc @@ -36,8 +36,6 @@ DEFINE_string(cuda_dir, "", DEFINE_string(warpctc_dir, "", "Specify path for loading libwarpctc.so."); -DEFINE_string(lapack_dir, "", "Specify path for loading liblapack.so."); - DEFINE_string(nccl_dir, "", "Specify path for loading nccl library, such as libcublas, " "libcurand. For instance, /usr/local/cuda/lib64. If default, " @@ -49,6 +47,8 @@ DEFINE_string( tensorrt_dir, "", "Specify path for loading tensorrt library, such as libnvinfer.so."); +DEFINE_string(mklml_dir, "", "Specify path for loading libmklml_intel.so."); + namespace paddle { namespace platform { namespace dynload { @@ -76,6 +76,7 @@ static inline void* GetDsoHandleFromDefaultPath(const std::string& dso_path, VLOG(3) << "Try to find library: " << dso_path << " from default system path."; // default search from LD_LIBRARY_PATH/DYLD_LIBRARY_PATH + // and /usr/local/lib path void* dso_handle = dlopen(dso_path.c_str(), dynload_flags); // DYLD_LIBRARY_PATH is disabled after Mac OS 10.11 to @@ -97,6 +98,10 @@ static inline void* GetDsoHandleFromDefaultPath(const std::string& dso_path, } #endif + if (nullptr == dso_handle) { + LOG(WARNING) << "Can not find library: " << dso_path + << ". Please try to add the lib path to LD_LIBRARY_PATH."; + } return dso_handle; } @@ -182,14 +187,6 @@ void* GetWarpCTCDsoHandle() { #endif } -void* GetLapackDsoHandle() { -#if defined(__APPLE__) || defined(__OSX__) - return GetDsoHandleFromSearchPath(FLAGS_lapack_dir, "liblapacke.dylib"); -#else - return GetDsoHandleFromSearchPath(FLAGS_lapack_dir, "liblapacke.so"); -#endif -} - void* GetNCCLDsoHandle() { #if defined(__APPLE__) || defined(__OSX__) return GetDsoHandleFromSearchPath(FLAGS_nccl_dir, "libnccl.dylib"); @@ -206,6 +203,14 @@ void* GetTensorRtDsoHandle() { #endif } +void* GetMKLMLDsoHandle() { +#if defined(__APPLE__) || defined(__OSX__) + return GetDsoHandleFromSearchPath(FLAGS_mklml_dir, "libmklml_intel.dylib"); +#else + return GetDsoHandleFromSearchPath(FLAGS_mklml_dir, "libmklml_intel.so"); +#endif +} + } // namespace dynload } // namespace platform } // namespace paddle diff --git a/paddle/fluid/platform/dynload/dynamic_loader.h b/paddle/fluid/platform/dynload/dynamic_loader.h index 0de3559b60..84fd2ce998 100644 --- a/paddle/fluid/platform/dynload/dynamic_loader.h +++ b/paddle/fluid/platform/dynload/dynamic_loader.h @@ -23,9 +23,9 @@ void* GetCUDNNDsoHandle(); void* GetCUPTIDsoHandle(); void* GetCurandDsoHandle(); void* GetWarpCTCDsoHandle(); -void* GetLapackDsoHandle(); void* GetNCCLDsoHandle(); void* GetTensorRtDsoHandle(); +void* GetMKLMLDsoHandle(); } // namespace dynload } // namespace platform diff --git a/paddle/fluid/platform/dynload/mklml.cc b/paddle/fluid/platform/dynload/mklml.cc new file mode 100644 index 0000000000..0f61a5e09b --- /dev/null +++ b/paddle/fluid/platform/dynload/mklml.cc @@ -0,0 +1,30 @@ +/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#include "paddle/fluid/platform/dynload/mklml.h" + +namespace paddle { +namespace platform { +namespace dynload { + +std::once_flag mklml_dso_flag; +void* mklml_dso_handle = nullptr; + +#define DEFINE_WRAP(__name) DynLoad__##__name __name + +MKLML_ROUTINE_EACH(DEFINE_WRAP); + +} // namespace dynload +} // namespace platform +} // namespace paddle diff --git a/paddle/fluid/platform/dynload/mklml.h b/paddle/fluid/platform/dynload/mklml.h new file mode 100644 index 0000000000..17acefe8cd --- /dev/null +++ b/paddle/fluid/platform/dynload/mklml.h @@ -0,0 +1,71 @@ +/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#pragma once + +#include +#include +#include // NOLINT +#include "paddle/fluid/platform/dynload/dynamic_loader.h" + +namespace paddle { +namespace platform { +namespace dynload { + +extern std::once_flag mklml_dso_flag; +extern void* mklml_dso_handle; + +/** + * The following macro definition can generate structs + * (for each function) to dynamic load mklml routine + * via operator overloading. + */ +#define DYNAMIC_LOAD_MKLML_WRAP(__name) \ + struct DynLoad__##__name { \ + template \ + auto operator()(Args... args) -> decltype(__name(args...)) { \ + using mklmlFunc = decltype(&::__name); \ + std::call_once(mklml_dso_flag, []() { \ + mklml_dso_handle = paddle::platform::dynload::GetMKLMLDsoHandle(); \ + }); \ + static void* p_##_name = dlsym(mklml_dso_handle, #__name); \ + return reinterpret_cast(p_##_name)(args...); \ + } \ + }; \ + extern DynLoad__##__name __name + +#define DECLARE_DYNAMIC_LOAD_MKLML_WRAP(__name) DYNAMIC_LOAD_MKLML_WRAP(__name) + +#define MKLML_ROUTINE_EACH(__macro) \ + __macro(cblas_sgemm); \ + __macro(cblas_saxpy); \ + __macro(cblas_scopy); \ + __macro(cblas_sgemv); \ + __macro(cblas_sgemm_batch); \ + __macro(cblas_dgemm); \ + __macro(cblas_daxpy); \ + __macro(cblas_dcopy); \ + __macro(cblas_dgemv); \ + __macro(cblas_dgemm_batch); \ + __macro(vsAdd); \ + __macro(vdAdd); \ + __macro(MKL_Set_Num_Threads) + +MKLML_ROUTINE_EACH(DECLARE_DYNAMIC_LOAD_MKLML_WRAP); + +#undef DYNAMIC_LOAD_MKLML_WRAP + +} // namespace dynload +} // namespace platform +} // namespace paddle diff --git a/paddle/fluid/platform/enforce.h b/paddle/fluid/platform/enforce.h index 7b8c29e1e6..566485cd3c 100644 --- a/paddle/fluid/platform/enforce.h +++ b/paddle/fluid/platform/enforce.h @@ -44,8 +44,10 @@ limitations under the License. */ #include "paddle/fluid/platform/dynload/cublas.h" #include "paddle/fluid/platform/dynload/cudnn.h" #include "paddle/fluid/platform/dynload/curand.h" +#ifndef __APPLE__ #include "paddle/fluid/platform/dynload/nccl.h" -#endif +#endif // __APPLE__ +#endif // PADDLE_WITH_CUDA namespace paddle { namespace platform { @@ -100,6 +102,15 @@ struct EnforceNotMet : public std::exception { const char* what() const noexcept { return err_str_.c_str(); } }; +struct EOFException : public std::exception { + std::string err_str_; + EOFException(const char* err_msg, const char* f, int l) { + err_str_ = string::Sprintf("%s at [%s:%d]", err_msg, f, l); + } + + const char* what() const noexcept { return err_str_.c_str(); } +}; + // Because most enforce conditions would evaluate to true, we can use // __builtin_expect to instruct the C++ compiler to generate code that // always forces branch prediction of true. @@ -111,7 +122,11 @@ template inline typename std::enable_if::type throw_on_error( bool stat, const Args&... args) { if (UNLIKELY(!(stat))) { +#ifndef REPLACE_ENFORCE_GLOG throw std::runtime_error(string::Sprintf(args...)); +#else + LOG(FATAL) << string::Sprintf(args...); +#endif } } @@ -121,8 +136,12 @@ template inline typename std::enable_if::type throw_on_error( cudaError_t e, const Args&... args) { if (UNLIKELY(e)) { +#ifndef REPLACE_ENFORCE_GLOG throw thrust::system_error(e, thrust::cuda_category(), string::Sprintf(args...)); +#else + LOG(FATAL) << string::Sprintf(args...); +#endif } } @@ -130,8 +149,12 @@ template inline typename std::enable_if::type throw_on_error( curandStatus_t stat, const Args&... args) { if (stat != CURAND_STATUS_SUCCESS) { +#ifndef REPLACE_ENFORCE_GLOG throw thrust::system_error(cudaErrorLaunchFailure, thrust::cuda_category(), string::Sprintf(args...)); +#else + LOG(FATAL) << string::Sprintf(args...); +#endif } } @@ -141,8 +164,12 @@ inline typename std::enable_if::type throw_on_error( if (stat == CUDNN_STATUS_SUCCESS) { return; } else { +#ifndef REPLACE_ENFORCE_GLOG throw std::runtime_error(platform::dynload::cudnnGetErrorString(stat) + string::Sprintf(args...)); +#else + LOG(FATAL) << string::Sprintf(args...); +#endif } } @@ -171,20 +198,30 @@ inline typename std::enable_if::type throw_on_error( } else if (stat == CUBLAS_STATUS_LICENSE_ERROR) { err = "CUBLAS: license error, "; } +#ifndef REPLACE_ENFORCE_GLOG throw std::runtime_error(err + string::Sprintf(args...)); +#else + LOG(FATAL) << err << string::Sprintf(args...); +#endif } +#ifndef __APPLE__ template inline typename std::enable_if::type throw_on_error( ncclResult_t stat, const Args&... args) { if (stat == ncclSuccess) { return; } else { +#ifndef REPLACE_ENFORCE_GLOG throw std::runtime_error(platform::dynload::ncclGetErrorString(stat) + string::Sprintf(args...)); +#else + LOG(FATAL) << platform::dynload::ncclGetErrorString(stat) + << string::Sprintf(args...); +#endif } } - +#endif // __APPLE__ #endif // PADDLE_WITH_CUDA template @@ -200,6 +237,7 @@ inline void throw_on_error(T e) { __FILE__, __LINE__); \ } while (false) +#ifndef REPLACE_ENFORCE_GLOG #define PADDLE_ENFORCE(...) \ do { \ try { \ @@ -209,7 +247,15 @@ inline void throw_on_error(T e) { __FILE__, __LINE__); \ } \ } while (false) +#else +#define PADDLE_ENFORCE(...) ::paddle::platform::throw_on_error(__VA_ARGS__); +#endif +#define PADDLE_THROW_EOF() \ + do { \ + throw ::paddle::platform::EOFException("There is no next data.", __FILE__, \ + __LINE__); \ + } while (false) /* * Some enforce helpers here, usage: * int a = 1; diff --git a/paddle/fluid/platform/enforce_test.cc b/paddle/fluid/platform/enforce_test.cc index 57d751cc00..0e8684581a 100644 --- a/paddle/fluid/platform/enforce_test.cc +++ b/paddle/fluid/platform/enforce_test.cc @@ -210,3 +210,14 @@ TEST(ENFORCE_USER_DEFINED_CLASS, NE) { Dims a{{1, 2, 3, 4}}, b{{5, 6, 7, 8}}; ASSERT_THROW(PADDLE_ENFORCE_EQ(a, b), paddle::platform::EnforceNotMet); } + +TEST(EOF_EXCEPTION, THROW_EOF) { + bool caught_eof = false; + try { + PADDLE_THROW_EOF(); + } catch (paddle::platform::EOFException error) { + caught_eof = true; + EXPECT_TRUE(HasPrefix(StringPiece(error.what()), "There is no next data.")); + } + EXPECT_TRUE(caught_eof); +} diff --git a/paddle/fluid/platform/float16_test.cc b/paddle/fluid/platform/float16_test.cc index a589e32b61..ede294be1e 100644 --- a/paddle/fluid/platform/float16_test.cc +++ b/paddle/fluid/platform/float16_test.cc @@ -13,8 +13,8 @@ limitations under the License. */ #include #include "gtest/gtest.h" -#include "paddle/fluid/framework/init.h" #include "paddle/fluid/framework/lod_tensor.h" +#include "paddle/fluid/platform/init.h" namespace paddle { namespace platform { diff --git a/paddle/fluid/platform/float16_test.cu b/paddle/fluid/platform/float16_test.cu index 577fc24ceb..1b9cf9b5d3 100644 --- a/paddle/fluid/platform/float16_test.cu +++ b/paddle/fluid/platform/float16_test.cu @@ -15,7 +15,7 @@ limitations under the License. */ #include "paddle/fluid/framework/lod_tensor.h" #include "paddle/fluid/framework/tensor_util.h" -#include "paddle/utils/Logging.h" +#include "paddle/legacy/utils/Logging.h" #define ARITHMETIC_KERNEL(op_type, sign) \ __global__ void op_type(const half* in1, const half* in2, half* out) { \ diff --git a/paddle/fluid/framework/init.cc b/paddle/fluid/platform/init.cc similarity index 96% rename from paddle/fluid/framework/init.cc rename to paddle/fluid/platform/init.cc index a1094976f6..0b77652841 100644 --- a/paddle/fluid/framework/init.cc +++ b/paddle/fluid/platform/init.cc @@ -16,10 +16,10 @@ limitations under the License. */ #include #include -#include "paddle/fluid/framework/init.h" #include "paddle/fluid/framework/operator.h" -#include "paddle/fluid/operators/math/blas.h" +#include "paddle/fluid/platform/cpu_helper.h" #include "paddle/fluid/platform/device_context.h" +#include "paddle/fluid/platform/init.h" #include "paddle/fluid/platform/place.h" #include "paddle/fluid/string/piece.h" @@ -115,7 +115,7 @@ void InitDevices(bool init_p2p, const std::vector devices) { places.emplace_back(platform::CPUPlace()); platform::DeviceContextPool::Init(places); #ifndef PADDLE_WITH_MKLDNN - operators::math::SetNumThreads(1); + platform::SetNumThreads(1); #endif } diff --git a/paddle/fluid/framework/init.h b/paddle/fluid/platform/init.h similarity index 100% rename from paddle/fluid/framework/init.h rename to paddle/fluid/platform/init.h diff --git a/paddle/fluid/framework/init_test.cc b/paddle/fluid/platform/init_test.cc similarity index 96% rename from paddle/fluid/framework/init_test.cc rename to paddle/fluid/platform/init_test.cc index 928e2d14ab..eef1470a90 100644 --- a/paddle/fluid/framework/init_test.cc +++ b/paddle/fluid/platform/init_test.cc @@ -13,8 +13,8 @@ See the License for the specific language governing permissions and limitations under the License. */ #include "gtest/gtest.h" -#include "paddle/fluid/framework/init.h" #include "paddle/fluid/platform/device_context.h" +#include "paddle/fluid/platform/init.h" TEST(InitDevices, CPU) { using paddle::framework::InitDevices; diff --git a/paddle/fluid/platform/mkldnn_helper.h b/paddle/fluid/platform/mkldnn_helper.h index de711b7d23..33fec2c107 100644 --- a/paddle/fluid/platform/mkldnn_helper.h +++ b/paddle/fluid/platform/mkldnn_helper.h @@ -14,6 +14,7 @@ limitations under the License. */ #pragma once #include +#include #include #include "paddle/fluid/framework/operator.h" #include "paddle/fluid/platform/place.h" @@ -99,5 +100,155 @@ inline mkldnn::memory::format GetMKLDNNFormat(const mkldnn::memory memory) { memory.get_primitive_desc().desc().data.format); } +inline mkldnn::memory::format GetMKLDNNFormat( + const mkldnn::sum::primitive_desc& memory) { + return static_cast( + memory.dst_primitive_desc().desc().data.format); +} + +class MKLDNNHandler { + public: + MKLDNNHandler(const MKLDNNDeviceContext& dev_ctx, mkldnn::engine engine, + const std::string& base_key) + : dev_ctx_(dev_ctx), + engine_(engine), + key_(base_key), + is_reusing_(false) {} + + std::shared_ptr AcquireSrcMemory( + const mkldnn::memory::desc& md, void* ptr) { + return this->AcquireMemory(md, ptr, "@user_src_mem_p"); + } + + std::shared_ptr AcquireWeightsMemory( + const mkldnn::memory::desc& md, void* ptr) { + return this->AcquireMemory(md, ptr, "@user_weights_mem_p"); + } + + std::shared_ptr AcquireDstMemory( + const mkldnn::memory::desc& md, void* ptr) { + return this->AcquireMemory(md, ptr, "@user_dst_mem_p"); + } + + std::shared_ptr AcquireDiffDstMemory( + const mkldnn::memory::desc& md, void* ptr) { + return this->AcquireMemory(md, ptr, "@user_diff_dst_mem_p"); + } + + std::shared_ptr AcquireDiffSrcMemory( + const mkldnn::memory::desc& md, void* ptr) { + return this->AcquireMemory(md, ptr, "@user_diff_src_mem_p"); + } + + std::shared_ptr AcquireMemoryFromPrimitive( + mkldnn::memory::primitive_desc mdp, void* ptr, + const std::string& suffix) { + auto local_key = key_ + suffix; + auto mem_p = + std::static_pointer_cast(dev_ctx_.GetBlob(local_key)); + PADDLE_ENFORCE((mem_p != nullptr) || (is_reusing_ == false), + "Fail to find mem primitive in device context"); + if (mem_p == nullptr) { + mem_p = std::make_shared(mdp, ptr); + dev_ctx_.SetBlob(local_key, mem_p); + } else { + mem_p->set_data_handle(ptr); + // Mark that reusing happenned. All primitives from operator instance + // should be reused or none of them. So we check consistency + is_reusing_ = true; + } + return mem_p; + } + + std::shared_ptr AcquireMemory(const mkldnn::memory::desc& md, + void* ptr, + const std::string& suffix) { + /*Generate key*/ + auto local_key = key_ + suffix; + auto mem_p = + std::static_pointer_cast(dev_ctx_.GetBlob(local_key)); + PADDLE_ENFORCE((mem_p != nullptr) || (is_reusing_ == false), + "Fail to find mem primitive in device context"); + if (mem_p == nullptr) { + mem_p = std::make_shared( + mkldnn::memory::primitive_desc{md, engine_}, ptr); + dev_ctx_.SetBlob(local_key, mem_p); + } else { + mem_p->set_data_handle(ptr); + // Mark that reusing happenned. All primitives from operator instance + // should be reused or none of them. So we check consistency + is_reusing_ = true; + } + return mem_p; + } + + std::shared_ptr AcquireMemory( + mkldnn::memory::primitive_desc& mpd, // NOLINT + mkldnn::memory::primitive_desc& user_mpd, // NOLINT + const std::shared_ptr user_memory_p, + const std::string& suffix, + std::vector& pipeline) { // NOLINT + // create reorder primitive if the input format is not the preferred one + auto local_key = key_ + suffix; + auto key_reorder_p = key_ + suffix + "reorder_p"; + + auto target_memory_p = + std::static_pointer_cast(dev_ctx_.GetBlob(local_key)); + PADDLE_ENFORCE((target_memory_p != nullptr) || (is_reusing_ == false), + "Fail to find mem primitive in device context"); + if (target_memory_p == nullptr) { + target_memory_p = user_memory_p; + std::shared_ptr reorder_p; + if (mpd != user_mpd) { + target_memory_p = std::make_shared(mpd); + + auto reorder_p = + std::make_shared(*user_memory_p, *target_memory_p); + dev_ctx_.SetBlob(key_reorder_p, reorder_p); + pipeline.push_back(*reorder_p); + } + dev_ctx_.SetBlob(local_key, target_memory_p); + } else { + // Make reorder if needed + auto reorder_p = std::static_pointer_cast( + dev_ctx_.GetBlob(key_reorder_p)); + if (reorder_p != nullptr) { + pipeline.push_back(*reorder_p); + } + is_reusing_ = true; + } + return target_memory_p; + } + + static std::string GetHash(mkldnn::memory::dims& operand_dims, // NOLINT + const std::string& suffix) { + auto dims2str = [](const mkldnn::memory::dims& operand_dims) { + std::string dstr = ""; + for (size_t i = 0; i < operand_dims.size(); ++i) { + dstr += std::to_string(operand_dims[i]) + "-"; + } + return dstr; + }; + + return dims2str(operand_dims) + suffix; + } + + protected: + const MKLDNNDeviceContext& dev_ctx_; + mkldnn::engine engine_; + std::string key_; + bool is_reusing_; +}; + +inline mkldnn::memory::format MKLDNNFormatForSize( + size_t dims_size, mkldnn::memory::format data_format) { + if (dims_size == 1) { + return mkldnn::memory::format::x; + } else if (dims_size == 2) { + return mkldnn::memory::format::nc; + } + return data_format; +} + } // namespace platform } // namespace paddle diff --git a/paddle/fluid/platform/place.h b/paddle/fluid/platform/place.h index ad54a87899..e3ee504f3d 100644 --- a/paddle/fluid/platform/place.h +++ b/paddle/fluid/platform/place.h @@ -30,6 +30,7 @@ struct CPUPlace { // needed for variant equality comparison inline bool operator==(const CPUPlace &) const { return true; } inline bool operator!=(const CPUPlace &) const { return false; } + inline bool operator<(const CPUPlace &) const { return false; } }; struct CUDAPlace { @@ -42,6 +43,7 @@ struct CUDAPlace { return device == o.device; } inline bool operator!=(const CUDAPlace &o) const { return !(*this == o); } + inline bool operator<(const CUDAPlace &o) const { return device < o.device; } int device; }; @@ -52,6 +54,7 @@ struct CUDAPinnedPlace { // needed for variant equality comparison inline bool operator==(const CUDAPinnedPlace &) const { return true; } inline bool operator!=(const CUDAPinnedPlace &) const { return false; } + inline bool operator<(const CUDAPinnedPlace &) const { return false; } }; struct IsCUDAPlace : public boost::static_visitor { @@ -89,18 +92,6 @@ bool is_cuda_pinned_place(const Place &); bool places_are_same_class(const Place &, const Place &); bool is_same_place(const Place &, const Place &); -struct PlaceHash { - std::size_t operator()(const Place &p) const { - constexpr size_t num_dev_bits = 4; - std::hash ihash; - size_t dev_id = 0; - if (is_gpu_place(p)) { - dev_id = boost::get(p).device; - } - return ihash(dev_id << num_dev_bits | p.which()); - } -}; - std::ostream &operator<<(std::ostream &, const Place &); template diff --git a/paddle/fluid/pybind/CMakeLists.txt b/paddle/fluid/pybind/CMakeLists.txt index 4fef351c21..89ca4f7812 100644 --- a/paddle/fluid/pybind/CMakeLists.txt +++ b/paddle/fluid/pybind/CMakeLists.txt @@ -2,13 +2,13 @@ if(WITH_PYTHON) if(WITH_AMD_GPU) hip_library(paddle_pybind SHARED SRCS pybind.cc exception.cc protobuf.cc const_value.cc recordio.cc - DEPS pybind python proto_desc memory executor prune init profiler feed_fetch_method + DEPS pybind python proto_desc memory executor prune profiler feed_fetch_method parallel_executor ${GLOB_OP_LIB}) else() cc_library(paddle_pybind SHARED SRCS pybind.cc exception.cc protobuf.cc const_value.cc recordio.cc - DEPS pybind python proto_desc memory executor prune init profiler feed_fetch_method + DEPS pybind python proto_desc memory executor prune profiler feed_fetch_method parallel_executor ${GLOB_OP_LIB}) if(NOT APPLE AND NOT ANDROID) diff --git a/paddle/fluid/pybind/exception.cc b/paddle/fluid/pybind/exception.cc index 08a2f185e1..831f30e35f 100644 --- a/paddle/fluid/pybind/exception.cc +++ b/paddle/fluid/pybind/exception.cc @@ -18,10 +18,13 @@ namespace paddle { namespace pybind { void BindException(pybind11::module* m) { + static pybind11::exception eof(*m, "EOFException"); static pybind11::exception exc(*m, "EnforceNotMet"); pybind11::register_exception_translator([](std::exception_ptr p) { try { if (p) std::rethrow_exception(p); + } catch (const platform::EOFException& e) { + eof(e.what()); } catch (const platform::EnforceNotMet& e) { exc(e.what()); } diff --git a/paddle/fluid/pybind/protobuf.cc b/paddle/fluid/pybind/protobuf.cc index bcf6d4dd30..fcd3356d44 100644 --- a/paddle/fluid/pybind/protobuf.cc +++ b/paddle/fluid/pybind/protobuf.cc @@ -268,7 +268,8 @@ void BindOpDesc(pybind11::module *m) { .value("STRINGS", pd::proto::AttrType::STRINGS) .value("BOOL", pd::proto::AttrType::BOOLEAN) .value("BOOLS", pd::proto::AttrType::BOOLEANS) - .value("BLOCK", pd::proto::AttrType::BLOCK); + .value("BLOCK", pd::proto::AttrType::BLOCK) + .value("BLOCKS", pd::proto::AttrType::BLOCKS); pybind11::class_ op_desc(*m, "OpDesc", ""); op_desc @@ -293,6 +294,7 @@ void BindOpDesc(pybind11::module *m) { .def("set_attr", &pd::OpDesc::SetAttr) .def("attr", &pd::OpDesc::GetAttr) .def("set_block_attr", &pd::OpDesc::SetBlockAttr) + .def("set_blocks_attr", &pd::OpDesc::SetBlocksAttr) .def("set_serialized_attr", [](pd::OpDesc &self, const std::string &name, const pybind11::bytes &seriralized) { diff --git a/paddle/fluid/pybind/pybind.cc b/paddle/fluid/pybind/pybind.cc index bd5c613f8c..0c523b6f17 100644 --- a/paddle/fluid/pybind/pybind.cc +++ b/paddle/fluid/pybind/pybind.cc @@ -24,7 +24,6 @@ limitations under the License. */ #include "paddle/fluid/framework/executor.h" #include "paddle/fluid/framework/feed_fetch_method.h" #include "paddle/fluid/framework/framework.pb.h" -#include "paddle/fluid/framework/init.h" #include "paddle/fluid/framework/lod_rank_table.h" #include "paddle/fluid/framework/lod_tensor.h" #include "paddle/fluid/framework/lod_tensor_array.h" @@ -34,7 +33,9 @@ limitations under the License. */ #include "paddle/fluid/framework/reader.h" #include "paddle/fluid/framework/selected_rows.h" #include "paddle/fluid/operators/activation_op.h" +#include "paddle/fluid/operators/reader/lod_tensor_blocking_queue.h" #include "paddle/fluid/platform/enforce.h" +#include "paddle/fluid/platform/init.h" #include "paddle/fluid/platform/place.h" #include "paddle/fluid/platform/profiler.h" #include "paddle/fluid/pybind/const_value.h" @@ -144,28 +145,75 @@ PYBIND11_PLUGIN(core) { py::class_(m, "LoDTensor") .def_buffer( [](Tensor &self) -> py::buffer_info { return CastToPyBuffer(self); }) - .def( - "__init__", - [](LoDTensor &instance, const std::vector> &lod) { - LoD new_lod; - new_lod.reserve(lod.size()); - std::copy(lod.begin(), lod.end(), std::back_inserter(new_lod)); - new (&instance) LoDTensor(new_lod); - }) + .def("__init__", + [](LoDTensor &instance, const std::vector> + &recursive_sequence_lengths) { + LoD new_lod; + new_lod.reserve(recursive_sequence_lengths.size()); + std::copy(recursive_sequence_lengths.begin(), + recursive_sequence_lengths.end(), + std::back_inserter(new_lod)); + LoD new_offset_lod = ConvertToOffsetBasedLoD(new_lod); + PADDLE_ENFORCE( + CheckLoD(new_offset_lod, -1), + "the provided recursive_sequence_lengths info is invalid"); + new (&instance) LoDTensor(new_offset_lod); + }) .def("__init__", [](LoDTensor &instance) { new (&instance) LoDTensor(); }) + // We implement offset based LOD in C++ while we use length based with + // Python API. So we changed set_lod to set_recursive_sequence_lengths to + // avoid misuse. + // The discussion is here: + // https://github.com/PaddlePaddle/Paddle/issues/10855 .def("set_lod", [](LoDTensor &self, const std::vector> &lod) { + // the input lod is offset-based level-of-detail info LoD new_lod; new_lod.reserve(lod.size()); std::copy(lod.begin(), lod.end(), std::back_inserter(new_lod)); + PADDLE_ENFORCE(CheckLoD(new_lod, vectorize(self.dims()).front()), + "the provided lod info is invalid"); self.set_lod(new_lod); }) - .def("lod", [](LoDTensor &self) -> std::vector> { - auto lod = self.lod(); - std::vector> new_lod; - new_lod.reserve(lod.size()); - std::copy(lod.begin(), lod.end(), std::back_inserter(new_lod)); - return new_lod; + .def("set_recursive_sequence_lengths", + [](LoDTensor &self, const std::vector> + &recursive_sequence_lengths) { + // the input recursive_sequence_lengths is length-based + // level-of-detail info + LoD new_lod; + new_lod.reserve(recursive_sequence_lengths.size()); + std::copy(recursive_sequence_lengths.begin(), + recursive_sequence_lengths.end(), + std::back_inserter(new_lod)); + LoD new_offset_lod = ConvertToOffsetBasedLoD(new_lod); + PADDLE_ENFORCE( + CheckLoD(new_offset_lod, vectorize(self.dims()).front()), + "the provided recursive_sequence_lengths info is invalid"); + self.set_lod(new_offset_lod); + }) + .def("lod", + [](LoDTensor &self) -> std::vector> { + // output the offset-based lod info + LoD lod = self.lod(); + std::vector> new_lod; + new_lod.reserve(lod.size()); + std::copy(lod.begin(), lod.end(), std::back_inserter(new_lod)); + return new_lod; + }) + // Set above comments of set_lod. + .def("recursive_sequence_lengths", + [](LoDTensor &self) -> std::vector> { + // output the length-based lod info + LoD lod = ConvertToLengthBasedLoD(self.lod()); + std::vector> new_lod; + new_lod.reserve(lod.size()); + std::copy(lod.begin(), lod.end(), std::back_inserter(new_lod)); + return new_lod; + }) + .def("has_valid_recursive_sequence_lengths", [](LoDTensor &self) -> bool { + // Check that the lod info is valid and match the outermost + // dimension of the LoDTensor data + return CheckLoD(self.lod(), vectorize(self.dims()).front()); }); py::class_(m, "SelectedRows") @@ -248,7 +296,38 @@ All parameter, weight, gradient are variables in Paddle. py::return_value_policy::reference); py::class_(m, "Reader", "") - .def("reset", &framework::ReaderHolder::ReInit); + .def("reset", &framework::ReaderHolder::ResetAll); + + using LoDTensorBlockingQueue = + ::paddle::operators::reader::LoDTensorBlockingQueue; + using LoDTensorBlockingQueueHolder = + ::paddle::operators::reader::LoDTensorBlockingQueueHolder; + py::class_(m, "LoDTensorBlockingQueue", "") + .def("push", + [](LoDTensorBlockingQueue &self, + const std::vector &lod_tensor_vec) { + pybind11::gil_scoped_release release; + return self.Push(lod_tensor_vec); + }) + .def("size", &LoDTensorBlockingQueue::Size) + .def("capacity", &LoDTensorBlockingQueue::Cap) + .def("close", &LoDTensorBlockingQueue::Close) + .def("is_closed", &LoDTensorBlockingQueue::IsClosed); + + m.def("init_lod_tensor_blocking_queue", + [](Variable &var, size_t capacity, + const std::vector> &shapes) + -> LoDTensorBlockingQueue * { + std::vector dims(shapes.size()); + std::transform(shapes.begin(), shapes.end(), dims.begin(), + [](const std::vector &shape) { + return make_ddim(shape); + }); + auto *holder = var.GetMutable(); + holder->InitOnce(capacity, dims); + return holder->GetQueue().get(); + }, + py::return_value_policy::reference); py::class_(m, "Scope", "") .def("var", @@ -414,11 +493,14 @@ All parameter, weight, gradient are variables in Paddle. py::class_(m, "Executor") .def(py::init()) #ifdef PADDLE_WITH_DISTRIBUTE - .def("complete", &Executor::Complete) + .def("begin_pass", &Executor::BeginPass) + .def("end_pass", &Executor::EndPass) #endif - .def("run", - (void (Executor::*)(const ProgramDesc &, Scope *, int, bool, bool)) & - Executor::Run); + .def("run", [](Executor &self, const ProgramDesc &prog, Scope *scope, + int block_id, bool create_local_scope, bool create_vars) { + pybind11::gil_scoped_release release; + self.Run(prog, scope, block_id, create_local_scope, create_vars); + }); m.def("init_gflags", framework::InitGflags); m.def("init_glog", framework::InitGLOG); @@ -562,7 +644,11 @@ All parameter, weight, gradient are variables in Paddle. [](const BuildStrategy &self) { return self.debug_graphviz_path_; }, [](BuildStrategy &self, const std::string &path) { self.debug_graphviz_path_ = path; - }); + }) + .def_property( + "enable_data_balance", + [](const BuildStrategy &self) { return self.enable_data_balance_; }, + [](BuildStrategy &self, bool b) { self.enable_data_balance_ = b; }); pe.def(py::init &, const std::unordered_set &, @@ -584,7 +670,12 @@ All parameter, weight, gradient are variables in Paddle. &ParallelExecutor::FeedTensorsIntoLocalScopes) .def("feed_and_split_tensor_into_local_scopes", &ParallelExecutor::FeedAndSplitTensorIntoLocalScopes) - .def("run", &ParallelExecutor::Run); + .def("run", [](ParallelExecutor &self, + const std::vector &fetch_tensors, + const std::string &fetched_var_name) { + pybind11::gil_scoped_release release; + self.Run(fetch_tensors, fetched_var_name); + }); BindRecordIOWriter(&m); return m.ptr(); diff --git a/paddle/fluid/pybind/tensor_py.h b/paddle/fluid/pybind/tensor_py.h index 93b09ed692..3e2ea1ef88 100644 --- a/paddle/fluid/pybind/tensor_py.h +++ b/paddle/fluid/pybind/tensor_py.h @@ -97,7 +97,7 @@ struct CastToPyBufferImpl { inline pybind11::buffer_info CastToPyBuffer(const framework::Tensor &tensor) { auto buffer_info = details::CastToPyBufferImpl()(tensor); + uint8_t, platform::float16>()(tensor); return buffer_info; } @@ -146,7 +146,7 @@ void PyCPUTensorSetFromArray( template <> // This following specialization maps uint16_t in the parameter type to // platform::float16. -void PyCPUTensorSetFromArray( +inline void PyCPUTensorSetFromArray( framework::Tensor *self, pybind11::array_t @@ -185,7 +185,7 @@ void PyCUDATensorSetFromArray( template <> // This following specialization maps uint16_t in the parameter type to // platform::float16. -void PyCUDATensorSetFromArray( +inline void PyCUDATensorSetFromArray( framework::Tensor *self, pybind11::array_t @@ -224,7 +224,7 @@ void PyCUDAPinnedTensorSetFromArray( template <> // This following specialization maps uint16_t in the parameter type to // platform::float16. -void PyCUDAPinnedTensorSetFromArray( +inline void PyCUDAPinnedTensorSetFromArray( framework::Tensor *self, pybind11::array_t diff --git a/paddle/fluid/string/printf.h b/paddle/fluid/string/printf.h index 062095a1c3..47de233773 100644 --- a/paddle/fluid/string/printf.h +++ b/paddle/fluid/string/printf.h @@ -83,6 +83,13 @@ void Fprintf(std::ostream& out, const char* fmt, const Args&... args) { tinyformat::vformat(out, fmt, tinyformat::makeFormatList(args...)); } +template +std::string Sprintf(const Args&... args) { + std::ostringstream oss; + Fprintf(oss, ""); + return oss.str(); +} + template std::string Sprintf(const char* fmt, const Args&... args) { std::ostringstream oss; diff --git a/paddle/fluid/string/printf_test.cc b/paddle/fluid/string/printf_test.cc index 678029f935..544b12ef3a 100644 --- a/paddle/fluid/string/printf_test.cc +++ b/paddle/fluid/string/printf_test.cc @@ -27,4 +27,5 @@ TEST(StringPrintf, StringPrintf) { EXPECT_EQ(std::string("Wednesday, July 27, 14:44"), paddle::string::Sprintf("%s, %s %d, %.2d:%.2d", weekday, month, day, hour, min)); + EXPECT_EQ(std::string(""), paddle::string::Sprintf()); } diff --git a/paddle/fluid/train/demo/demo_trainer.cc b/paddle/fluid/train/demo/demo_trainer.cc index 813d838686..4425f062ef 100644 --- a/paddle/fluid/train/demo/demo_trainer.cc +++ b/paddle/fluid/train/demo/demo_trainer.cc @@ -15,11 +15,11 @@ #include #include "paddle/fluid/framework/executor.h" -#include "paddle/fluid/framework/init.h" #include "paddle/fluid/framework/op_registry.h" #include "paddle/fluid/framework/program_desc.h" #include "paddle/fluid/framework/tensor_util.h" #include "paddle/fluid/platform/device_context.h" +#include "paddle/fluid/platform/init.h" #include "paddle/fluid/platform/place.h" namespace paddle { diff --git a/paddle/gserver/tests/Sequence/train.list b/paddle/gserver/tests/Sequence/train.list deleted file mode 100644 index be27acb3a5..0000000000 --- a/paddle/gserver/tests/Sequence/train.list +++ /dev/null @@ -1 +0,0 @@ -gserver/tests/Sequence/tour_train_wdseg diff --git a/paddle/gserver/tests/Sequence/train.list.nest b/paddle/gserver/tests/Sequence/train.list.nest deleted file mode 100644 index 7683ebc68e..0000000000 --- a/paddle/gserver/tests/Sequence/train.list.nest +++ /dev/null @@ -1 +0,0 @@ -gserver/tests/Sequence/tour_train_wdseg.nest diff --git a/paddle/api/Arguments.cpp b/paddle/legacy/api/Arguments.cpp similarity index 99% rename from paddle/api/Arguments.cpp rename to paddle/legacy/api/Arguments.cpp index 62d6a574d5..7bb5a6f75b 100644 --- a/paddle/api/Arguments.cpp +++ b/paddle/legacy/api/Arguments.cpp @@ -15,7 +15,7 @@ limitations under the License. */ #include "PaddleAPI.h" #include "PaddleAPIPrivate.h" -#include "paddle/parameter/Argument.h" +#include "paddle/legacy/parameter/Argument.h" size_t Arguments::getSlotNum() const { return m->outputs.size(); } diff --git a/paddle/api/CMakeLists.txt b/paddle/legacy/api/CMakeLists.txt similarity index 100% rename from paddle/api/CMakeLists.txt rename to paddle/legacy/api/CMakeLists.txt diff --git a/paddle/api/ConfigParser.cpp b/paddle/legacy/api/ConfigParser.cpp similarity index 98% rename from paddle/api/ConfigParser.cpp rename to paddle/legacy/api/ConfigParser.cpp index d362a1e7cf..016d6da4e2 100644 --- a/paddle/api/ConfigParser.cpp +++ b/paddle/legacy/api/ConfigParser.cpp @@ -14,7 +14,7 @@ limitations under the License. */ #include "PaddleAPI.h" #include "PaddleAPIPrivate.h" -#include "paddle/trainer/Trainer.h" +#include "paddle/legacy/trainer/Trainer.h" struct ParameterConfigPrivate { paddle::ParameterPtr parameter; diff --git a/paddle/api/Evaluator.cpp b/paddle/legacy/api/Evaluator.cpp similarity index 100% rename from paddle/api/Evaluator.cpp rename to paddle/legacy/api/Evaluator.cpp diff --git a/paddle/api/GradientMachine.cpp b/paddle/legacy/api/GradientMachine.cpp similarity index 98% rename from paddle/api/GradientMachine.cpp rename to paddle/legacy/api/GradientMachine.cpp index 0d9ad30de9..5ad2fe11a4 100644 --- a/paddle/api/GradientMachine.cpp +++ b/paddle/legacy/api/GradientMachine.cpp @@ -16,7 +16,7 @@ limitations under the License. */ #include "PaddleAPIPrivate.h" #include "Internal.h" -#include "paddle/gserver/gradientmachines/NeuralNetwork.h" +#include "paddle/legacy/gserver/gradientmachines/NeuralNetwork.h" std::vector GradientMachine::defaultParamTypes = { PARAMETER_VALUE, PARAMETER_GRADIENT, PARAMETER_MOMENTUM}; diff --git a/paddle/api/Internal.h b/paddle/legacy/api/Internal.h similarity index 100% rename from paddle/api/Internal.h rename to paddle/legacy/api/Internal.h diff --git a/paddle/api/Matrix.cpp b/paddle/legacy/api/Matrix.cpp similarity index 98% rename from paddle/api/Matrix.cpp rename to paddle/legacy/api/Matrix.cpp index 8282b4629d..8862d0ea92 100644 --- a/paddle/api/Matrix.cpp +++ b/paddle/legacy/api/Matrix.cpp @@ -12,12 +12,12 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. */ -#include "paddle/math/Matrix.h" +#include "paddle/legacy/math/Matrix.h" #include #include #include "PaddleAPI.h" -#include "paddle/math/CpuSparseMatrix.h" -#include "paddle/math/SparseMatrix.h" +#include "paddle/legacy/math/CpuSparseMatrix.h" +#include "paddle/legacy/math/SparseMatrix.h" struct MatrixPrivate { std::shared_ptr mat; diff --git a/paddle/api/Paddle.i b/paddle/legacy/api/Paddle.i similarity index 98% rename from paddle/api/Paddle.i rename to paddle/legacy/api/Paddle.i index 3237e73745..7a1456a5c0 100644 --- a/paddle/api/Paddle.i +++ b/paddle/legacy/api/Paddle.i @@ -2,7 +2,7 @@ %include "std_string.i" %{ #define SWIG_FILE_WITH_INIT -#include "api/PaddleAPI.h" +#include "legacy/api/PaddleAPI.h" %} %include "exception.i" @@ -198,5 +198,5 @@ namespace std { %ignore ParameterConfigPrivate; %ignore OptimizationConfigPrivate; %ignore ParameterTraverseCallbackPrivate; -%include "utils/GlobalConstants.h" -%include "api/PaddleAPI.h" +%include "legacy/utils/GlobalConstants.h" +%include "legacy/api/PaddleAPI.h" diff --git a/paddle/api/PaddleAPI.h b/paddle/legacy/api/PaddleAPI.h similarity index 99% rename from paddle/api/PaddleAPI.h rename to paddle/legacy/api/PaddleAPI.h index 7866122006..475984a3d5 100644 --- a/paddle/api/PaddleAPI.h +++ b/paddle/legacy/api/PaddleAPI.h @@ -19,9 +19,9 @@ limitations under the License. */ #include #include #include -#include "paddle/gserver/gradientmachines/GradientMachine.h" -#include "paddle/utils/Common.h" -#include "paddle/utils/GlobalConstants.h" +#include "paddle/legacy/gserver/gradientmachines/GradientMachine.h" +#include "paddle/legacy/utils/Common.h" +#include "paddle/legacy/utils/GlobalConstants.h" /// Import PaddlePaddle's enumeration into global namespace. using namespace paddle::enumeration_wrapper; // NOLINT diff --git a/paddle/api/PaddleAPIPrivate.h b/paddle/legacy/api/PaddleAPIPrivate.h similarity index 91% rename from paddle/api/PaddleAPIPrivate.h rename to paddle/legacy/api/PaddleAPIPrivate.h index e141fcd761..3ee192c31d 100644 --- a/paddle/api/PaddleAPIPrivate.h +++ b/paddle/legacy/api/PaddleAPIPrivate.h @@ -14,10 +14,10 @@ limitations under the License. */ #pragma once #include #include "PaddleAPI.h" -#include "paddle/gserver/evaluators/Evaluator.h" -#include "paddle/gserver/gradientmachines/GradientMachine.h" -#include "paddle/parameter/ParameterUpdaterBase.h" -#include "paddle/trainer/TrainerConfigHelper.h" +#include "paddle/legacy/gserver/evaluators/Evaluator.h" +#include "paddle/legacy/gserver/gradientmachines/GradientMachine.h" +#include "paddle/legacy/parameter/ParameterUpdaterBase.h" +#include "paddle/legacy/trainer/TrainerConfigHelper.h" struct GradientMachinePrivate { std::shared_ptr machine; diff --git a/paddle/api/Parameter.cpp b/paddle/legacy/api/Parameter.cpp similarity index 97% rename from paddle/api/Parameter.cpp rename to paddle/legacy/api/Parameter.cpp index 589d22e74e..f05740eb75 100644 --- a/paddle/api/Parameter.cpp +++ b/paddle/legacy/api/Parameter.cpp @@ -12,7 +12,7 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. */ -#include "paddle/parameter/Parameter.h" +#include "paddle/legacy/parameter/Parameter.h" #include "PaddleAPI.h" #include "PaddleAPIPrivate.h" diff --git a/paddle/api/ParameterOptimizer.cpp b/paddle/legacy/api/ParameterOptimizer.cpp similarity index 98% rename from paddle/api/ParameterOptimizer.cpp rename to paddle/legacy/api/ParameterOptimizer.cpp index d4620be3e6..477d9dae44 100644 --- a/paddle/api/ParameterOptimizer.cpp +++ b/paddle/legacy/api/ParameterOptimizer.cpp @@ -12,7 +12,7 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. */ -#include "paddle/parameter/ParameterOptimizer.h" +#include "paddle/legacy/parameter/ParameterOptimizer.h" #include #include "Internal.h" #include "PaddleAPI.h" diff --git a/paddle/api/ParameterUpdater.cpp b/paddle/legacy/api/ParameterUpdater.cpp similarity index 94% rename from paddle/api/ParameterUpdater.cpp rename to paddle/legacy/api/ParameterUpdater.cpp index 63c000c959..44af3f4635 100644 --- a/paddle/api/ParameterUpdater.cpp +++ b/paddle/legacy/api/ParameterUpdater.cpp @@ -16,10 +16,10 @@ limitations under the License. */ #include "PaddleAPIPrivate.h" #ifndef PADDLE_WITHOUT_GOLANG -#include "paddle/trainer/NewRemoteParameterUpdater.h" +#include "paddle/legacy/trainer/NewRemoteParameterUpdater.h" #endif -#include "paddle/trainer/RemoteParameterUpdater.h" -#include "paddle/trainer/ThreadParameterUpdater.h" +#include "paddle/legacy/trainer/RemoteParameterUpdater.h" +#include "paddle/legacy/trainer/ThreadParameterUpdater.h" ParameterUpdater::ParameterUpdater() : m(new ParameterUpdaterPrivate()) {} diff --git a/paddle/api/SequenceGenerator.cpp b/paddle/legacy/api/SequenceGenerator.cpp similarity index 98% rename from paddle/api/SequenceGenerator.cpp rename to paddle/legacy/api/SequenceGenerator.cpp index 1446c30842..2a73228f6d 100644 --- a/paddle/api/SequenceGenerator.cpp +++ b/paddle/legacy/api/SequenceGenerator.cpp @@ -17,9 +17,9 @@ limitations under the License. */ #include #include #include "PaddleAPI.h" -#include "paddle/gserver/gradientmachines/GradientMachine.h" -#include "paddle/parameter/Argument.h" -#include "paddle/utils/Flags.h" +#include "paddle/legacy/gserver/gradientmachines/GradientMachine.h" +#include "paddle/legacy/parameter/Argument.h" +#include "paddle/legacy/utils/Flags.h" // used to represent partial sequence struct Path { diff --git a/paddle/api/Trainer.cpp b/paddle/legacy/api/Trainer.cpp similarity index 95% rename from paddle/api/Trainer.cpp rename to paddle/legacy/api/Trainer.cpp index 795460b650..e7c607201b 100644 --- a/paddle/api/Trainer.cpp +++ b/paddle/legacy/api/Trainer.cpp @@ -19,11 +19,11 @@ limitations under the License. */ #include #include -#include "paddle/gserver/gradientmachines/NeuralNetwork.h" -#include "paddle/trainer/ParamUtil.h" -#include "paddle/trainer/Trainer.h" -#include "paddle/trainer/TrainerInternal.h" -#include "paddle/utils/Flags.h" +#include "paddle/legacy/gserver/gradientmachines/NeuralNetwork.h" +#include "paddle/legacy/trainer/ParamUtil.h" +#include "paddle/legacy/trainer/Trainer.h" +#include "paddle/legacy/trainer/TrainerInternal.h" +#include "paddle/legacy/utils/Flags.h" using paddle::real; diff --git a/paddle/api/Util.cpp b/paddle/legacy/api/Util.cpp similarity index 89% rename from paddle/api/Util.cpp rename to paddle/legacy/api/Util.cpp index 618e87e964..b458c4d90e 100644 --- a/paddle/api/Util.cpp +++ b/paddle/legacy/api/Util.cpp @@ -14,11 +14,11 @@ limitations under the License. */ #include "PaddleAPI.h" -#include "paddle/parameter/Parameter.h" -#include "paddle/utils/Common.h" -#include "paddle/utils/Flags.h" -#include "paddle/utils/PythonUtil.h" -#include "paddle/utils/Util.h" +#include "paddle/legacy/parameter/Parameter.h" +#include "paddle/legacy/utils/Common.h" +#include "paddle/legacy/utils/Flags.h" +#include "paddle/legacy/utils/PythonUtil.h" +#include "paddle/legacy/utils/Util.h" #include #include diff --git a/paddle/api/Vector.cpp b/paddle/legacy/api/Vector.cpp similarity index 99% rename from paddle/api/Vector.cpp rename to paddle/legacy/api/Vector.cpp index e2a7b974ca..73b6d3a15d 100644 --- a/paddle/api/Vector.cpp +++ b/paddle/legacy/api/Vector.cpp @@ -14,7 +14,7 @@ limitations under the License. */ #include "PaddleAPI.h" -#include "paddle/math/Vector.h" +#include "paddle/legacy/math/Vector.h" #include diff --git a/paddle/api/__init__.py b/paddle/legacy/api/__init__.py similarity index 100% rename from paddle/api/__init__.py rename to paddle/legacy/api/__init__.py diff --git a/paddle/api/numpy.i b/paddle/legacy/api/numpy.i similarity index 100% rename from paddle/api/numpy.i rename to paddle/legacy/api/numpy.i diff --git a/paddle/api/test/.gitignore b/paddle/legacy/api/test/.gitignore similarity index 100% rename from paddle/api/test/.gitignore rename to paddle/legacy/api/test/.gitignore diff --git a/paddle/api/test/CMakeLists.txt b/paddle/legacy/api/test/CMakeLists.txt similarity index 100% rename from paddle/api/test/CMakeLists.txt rename to paddle/legacy/api/test/CMakeLists.txt diff --git a/paddle/api/test/testArguments.py b/paddle/legacy/api/test/testArguments.py similarity index 100% rename from paddle/api/test/testArguments.py rename to paddle/legacy/api/test/testArguments.py diff --git a/paddle/api/test/testGradientMachine.py b/paddle/legacy/api/test/testGradientMachine.py similarity index 100% rename from paddle/api/test/testGradientMachine.py rename to paddle/legacy/api/test/testGradientMachine.py diff --git a/paddle/api/test/testMatrix.py b/paddle/legacy/api/test/testMatrix.py similarity index 100% rename from paddle/api/test/testMatrix.py rename to paddle/legacy/api/test/testMatrix.py diff --git a/paddle/api/test/testTrain.py b/paddle/legacy/api/test/testTrain.py similarity index 100% rename from paddle/api/test/testTrain.py rename to paddle/legacy/api/test/testTrain.py diff --git a/paddle/api/test/testTrainConfig.py b/paddle/legacy/api/test/testTrainConfig.py similarity index 100% rename from paddle/api/test/testTrainConfig.py rename to paddle/legacy/api/test/testTrainConfig.py diff --git a/paddle/api/test/testTrainer.py b/paddle/legacy/api/test/testTrainer.py similarity index 100% rename from paddle/api/test/testTrainer.py rename to paddle/legacy/api/test/testTrainer.py diff --git a/paddle/api/test/testVector.py b/paddle/legacy/api/test/testVector.py similarity index 100% rename from paddle/api/test/testVector.py rename to paddle/legacy/api/test/testVector.py diff --git a/paddle/api/test/util.py b/paddle/legacy/api/test/util.py similarity index 100% rename from paddle/api/test/util.py rename to paddle/legacy/api/test/util.py diff --git a/paddle/capi/Arguments.cpp b/paddle/legacy/capi/Arguments.cpp similarity index 100% rename from paddle/capi/Arguments.cpp rename to paddle/legacy/capi/Arguments.cpp diff --git a/paddle/capi/CMakeLists.txt b/paddle/legacy/capi/CMakeLists.txt similarity index 100% rename from paddle/capi/CMakeLists.txt rename to paddle/legacy/capi/CMakeLists.txt diff --git a/paddle/capi/Main.cpp b/paddle/legacy/capi/Main.cpp similarity index 90% rename from paddle/capi/Main.cpp rename to paddle/legacy/capi/Main.cpp index 0a289dede6..17d8f00a88 100644 --- a/paddle/capi/Main.cpp +++ b/paddle/legacy/capi/Main.cpp @@ -18,9 +18,9 @@ limitations under the License. */ #include #include "capi_private.h" #include "main.h" -#include "paddle/trainer/TrainerConfigHelper.h" -#include "paddle/utils/Excepts.h" -#include "paddle/utils/PythonUtil.h" +#include "paddle/legacy/trainer/TrainerConfigHelper.h" +#include "paddle/legacy/utils/Excepts.h" +#include "paddle/legacy/utils/PythonUtil.h" static void initPaddle(int argc, char** argv) { paddle::initMain(argc, argv); diff --git a/paddle/capi/Matrix.cpp b/paddle/legacy/capi/Matrix.cpp similarity index 100% rename from paddle/capi/Matrix.cpp rename to paddle/legacy/capi/Matrix.cpp diff --git a/paddle/capi/Vector.cpp b/paddle/legacy/capi/Vector.cpp similarity index 100% rename from paddle/capi/Vector.cpp rename to paddle/legacy/capi/Vector.cpp diff --git a/paddle/capi/arguments.h b/paddle/legacy/capi/arguments.h similarity index 100% rename from paddle/capi/arguments.h rename to paddle/legacy/capi/arguments.h diff --git a/paddle/capi/capi.h b/paddle/legacy/capi/capi.h similarity index 100% rename from paddle/capi/capi.h rename to paddle/legacy/capi/capi.h diff --git a/paddle/capi/capi_private.h b/paddle/legacy/capi/capi_private.h similarity index 90% rename from paddle/capi/capi_private.h rename to paddle/legacy/capi/capi_private.h index 3332f42a4a..e5f8c8c5c8 100644 --- a/paddle/capi/capi_private.h +++ b/paddle/legacy/capi/capi_private.h @@ -13,10 +13,10 @@ See the License for the specific language governing permissions and limitations under the License. */ #include "capi.h" -#include "paddle/gserver/gradientmachines/GradientMachine.h" -#include "paddle/math/Matrix.h" -#include "paddle/math/Vector.h" -#include "paddle/parameter/Argument.h" +#include "paddle/legacy/gserver/gradientmachines/GradientMachine.h" +#include "paddle/legacy/math/Matrix.h" +#include "paddle/legacy/math/Vector.h" +#include "paddle/legacy/parameter/Argument.h" #pragma once namespace paddle { diff --git a/paddle/capi/config.h.in b/paddle/legacy/capi/config.h.in similarity index 100% rename from paddle/capi/config.h.in rename to paddle/legacy/capi/config.h.in diff --git a/paddle/capi/error.cpp b/paddle/legacy/capi/error.cpp similarity index 100% rename from paddle/capi/error.cpp rename to paddle/legacy/capi/error.cpp diff --git a/paddle/capi/error.h b/paddle/legacy/capi/error.h similarity index 100% rename from paddle/capi/error.h rename to paddle/legacy/capi/error.h diff --git a/paddle/capi/examples/.gitignore b/paddle/legacy/capi/examples/.gitignore similarity index 100% rename from paddle/capi/examples/.gitignore rename to paddle/legacy/capi/examples/.gitignore diff --git a/paddle/capi/examples/README.md b/paddle/legacy/capi/examples/README.md similarity index 100% rename from paddle/capi/examples/README.md rename to paddle/legacy/capi/examples/README.md diff --git a/paddle/capi/examples/model_inference/README.md b/paddle/legacy/capi/examples/model_inference/README.md similarity index 100% rename from paddle/capi/examples/model_inference/README.md rename to paddle/legacy/capi/examples/model_inference/README.md diff --git a/paddle/capi/examples/model_inference/common/common.h b/paddle/legacy/capi/examples/model_inference/common/common.h similarity index 100% rename from paddle/capi/examples/model_inference/common/common.h rename to paddle/legacy/capi/examples/model_inference/common/common.h diff --git a/paddle/capi/examples/model_inference/dense/CMakeLists.txt b/paddle/legacy/capi/examples/model_inference/dense/CMakeLists.txt similarity index 100% rename from paddle/capi/examples/model_inference/dense/CMakeLists.txt rename to paddle/legacy/capi/examples/model_inference/dense/CMakeLists.txt diff --git a/paddle/capi/examples/model_inference/dense/convert_protobin.sh b/paddle/legacy/capi/examples/model_inference/dense/convert_protobin.sh similarity index 100% rename from paddle/capi/examples/model_inference/dense/convert_protobin.sh rename to paddle/legacy/capi/examples/model_inference/dense/convert_protobin.sh diff --git a/paddle/capi/examples/model_inference/dense/main.c b/paddle/legacy/capi/examples/model_inference/dense/main.c similarity index 100% rename from paddle/capi/examples/model_inference/dense/main.c rename to paddle/legacy/capi/examples/model_inference/dense/main.c diff --git a/paddle/capi/examples/model_inference/dense/merge_v2_model.py b/paddle/legacy/capi/examples/model_inference/dense/merge_v2_model.py similarity index 100% rename from paddle/capi/examples/model_inference/dense/merge_v2_model.py rename to paddle/legacy/capi/examples/model_inference/dense/merge_v2_model.py diff --git a/paddle/capi/examples/model_inference/dense/mnist_v2.py b/paddle/legacy/capi/examples/model_inference/dense/mnist_v2.py similarity index 100% rename from paddle/capi/examples/model_inference/dense/mnist_v2.py rename to paddle/legacy/capi/examples/model_inference/dense/mnist_v2.py diff --git a/paddle/capi/examples/model_inference/dense/trainer_config.py b/paddle/legacy/capi/examples/model_inference/dense/trainer_config.py similarity index 100% rename from paddle/capi/examples/model_inference/dense/trainer_config.py rename to paddle/legacy/capi/examples/model_inference/dense/trainer_config.py diff --git a/paddle/capi/examples/model_inference/multi_thread/.gitignore b/paddle/legacy/capi/examples/model_inference/multi_thread/.gitignore similarity index 100% rename from paddle/capi/examples/model_inference/multi_thread/.gitignore rename to paddle/legacy/capi/examples/model_inference/multi_thread/.gitignore diff --git a/paddle/capi/examples/model_inference/multi_thread/CMakeLists.txt b/paddle/legacy/capi/examples/model_inference/multi_thread/CMakeLists.txt similarity index 100% rename from paddle/capi/examples/model_inference/multi_thread/CMakeLists.txt rename to paddle/legacy/capi/examples/model_inference/multi_thread/CMakeLists.txt diff --git a/paddle/capi/examples/model_inference/multi_thread/convert_protobin.sh b/paddle/legacy/capi/examples/model_inference/multi_thread/convert_protobin.sh similarity index 100% rename from paddle/capi/examples/model_inference/multi_thread/convert_protobin.sh rename to paddle/legacy/capi/examples/model_inference/multi_thread/convert_protobin.sh diff --git a/paddle/capi/examples/model_inference/multi_thread/main.c b/paddle/legacy/capi/examples/model_inference/multi_thread/main.c similarity index 100% rename from paddle/capi/examples/model_inference/multi_thread/main.c rename to paddle/legacy/capi/examples/model_inference/multi_thread/main.c diff --git a/paddle/capi/examples/model_inference/multi_thread/main_gpu.c b/paddle/legacy/capi/examples/model_inference/multi_thread/main_gpu.c similarity index 100% rename from paddle/capi/examples/model_inference/multi_thread/main_gpu.c rename to paddle/legacy/capi/examples/model_inference/multi_thread/main_gpu.c diff --git a/paddle/capi/examples/model_inference/multi_thread/trainer_config.py b/paddle/legacy/capi/examples/model_inference/multi_thread/trainer_config.py similarity index 100% rename from paddle/capi/examples/model_inference/multi_thread/trainer_config.py rename to paddle/legacy/capi/examples/model_inference/multi_thread/trainer_config.py diff --git a/paddle/capi/examples/model_inference/sequence/.gitignore b/paddle/legacy/capi/examples/model_inference/sequence/.gitignore similarity index 100% rename from paddle/capi/examples/model_inference/sequence/.gitignore rename to paddle/legacy/capi/examples/model_inference/sequence/.gitignore diff --git a/paddle/capi/examples/model_inference/sequence/CMakeLists.txt b/paddle/legacy/capi/examples/model_inference/sequence/CMakeLists.txt similarity index 100% rename from paddle/capi/examples/model_inference/sequence/CMakeLists.txt rename to paddle/legacy/capi/examples/model_inference/sequence/CMakeLists.txt diff --git a/paddle/capi/examples/model_inference/sequence/convert_protobin.sh b/paddle/legacy/capi/examples/model_inference/sequence/convert_protobin.sh similarity index 100% rename from paddle/capi/examples/model_inference/sequence/convert_protobin.sh rename to paddle/legacy/capi/examples/model_inference/sequence/convert_protobin.sh diff --git a/paddle/capi/examples/model_inference/sequence/main.c b/paddle/legacy/capi/examples/model_inference/sequence/main.c similarity index 100% rename from paddle/capi/examples/model_inference/sequence/main.c rename to paddle/legacy/capi/examples/model_inference/sequence/main.c diff --git a/paddle/capi/examples/model_inference/sequence/trainer_config.py b/paddle/legacy/capi/examples/model_inference/sequence/trainer_config.py similarity index 100% rename from paddle/capi/examples/model_inference/sequence/trainer_config.py rename to paddle/legacy/capi/examples/model_inference/sequence/trainer_config.py diff --git a/paddle/capi/examples/model_inference/sparse_binary/.gitignore b/paddle/legacy/capi/examples/model_inference/sparse_binary/.gitignore similarity index 100% rename from paddle/capi/examples/model_inference/sparse_binary/.gitignore rename to paddle/legacy/capi/examples/model_inference/sparse_binary/.gitignore diff --git a/paddle/capi/examples/model_inference/sparse_binary/CMakeLists.txt b/paddle/legacy/capi/examples/model_inference/sparse_binary/CMakeLists.txt similarity index 100% rename from paddle/capi/examples/model_inference/sparse_binary/CMakeLists.txt rename to paddle/legacy/capi/examples/model_inference/sparse_binary/CMakeLists.txt diff --git a/paddle/capi/examples/model_inference/sparse_binary/convert_protobin.sh b/paddle/legacy/capi/examples/model_inference/sparse_binary/convert_protobin.sh similarity index 100% rename from paddle/capi/examples/model_inference/sparse_binary/convert_protobin.sh rename to paddle/legacy/capi/examples/model_inference/sparse_binary/convert_protobin.sh diff --git a/paddle/capi/examples/model_inference/sparse_binary/main.c b/paddle/legacy/capi/examples/model_inference/sparse_binary/main.c similarity index 100% rename from paddle/capi/examples/model_inference/sparse_binary/main.c rename to paddle/legacy/capi/examples/model_inference/sparse_binary/main.c diff --git a/paddle/capi/examples/model_inference/sparse_binary/trainer_config.py b/paddle/legacy/capi/examples/model_inference/sparse_binary/trainer_config.py similarity index 100% rename from paddle/capi/examples/model_inference/sparse_binary/trainer_config.py rename to paddle/legacy/capi/examples/model_inference/sparse_binary/trainer_config.py diff --git a/paddle/capi/gradient_machine.cpp b/paddle/legacy/capi/gradient_machine.cpp similarity index 98% rename from paddle/capi/gradient_machine.cpp rename to paddle/legacy/capi/gradient_machine.cpp index 8c3f504e5a..0c5ddd856b 100644 --- a/paddle/capi/gradient_machine.cpp +++ b/paddle/legacy/capi/gradient_machine.cpp @@ -14,7 +14,7 @@ limitations under the License. */ #include "gradient_machine.h" #include "capi_private.h" -#include "paddle/gserver/gradientmachines/NeuralNetwork.h" +#include "paddle/legacy/gserver/gradientmachines/NeuralNetwork.h" #define cast(v) paddle::capi::cast(v) diff --git a/paddle/capi/gradient_machine.h b/paddle/legacy/capi/gradient_machine.h similarity index 100% rename from paddle/capi/gradient_machine.h rename to paddle/legacy/capi/gradient_machine.h diff --git a/paddle/capi/main.h b/paddle/legacy/capi/main.h similarity index 100% rename from paddle/capi/main.h rename to paddle/legacy/capi/main.h diff --git a/paddle/capi/matrix.h b/paddle/legacy/capi/matrix.h similarity index 100% rename from paddle/capi/matrix.h rename to paddle/legacy/capi/matrix.h diff --git a/paddle/capi/paddle_capi.map b/paddle/legacy/capi/paddle_capi.map similarity index 100% rename from paddle/capi/paddle_capi.map rename to paddle/legacy/capi/paddle_capi.map diff --git a/paddle/capi/tests/.gitignore b/paddle/legacy/capi/tests/.gitignore similarity index 100% rename from paddle/capi/tests/.gitignore rename to paddle/legacy/capi/tests/.gitignore diff --git a/paddle/capi/tests/CMakeLists.txt b/paddle/legacy/capi/tests/CMakeLists.txt similarity index 100% rename from paddle/capi/tests/CMakeLists.txt rename to paddle/legacy/capi/tests/CMakeLists.txt diff --git a/paddle/capi/tests/test_Arguments.cpp b/paddle/legacy/capi/tests/test_Arguments.cpp similarity index 99% rename from paddle/capi/tests/test_Arguments.cpp rename to paddle/legacy/capi/tests/test_Arguments.cpp index bb08adf716..6fb379719d 100644 --- a/paddle/capi/tests/test_Arguments.cpp +++ b/paddle/legacy/capi/tests/test_Arguments.cpp @@ -15,7 +15,7 @@ limitations under the License. */ #include #include "capi.h" #include "gtest/gtest.h" -#include "paddle/utils/ThreadLocal.h" +#include "paddle/legacy/utils/ThreadLocal.h" static std::vector randomBuffer(size_t bufSize) { auto& eng = paddle::ThreadLocalRandomEngine::get(); diff --git a/paddle/capi/tests/test_GradientMachine.cpp b/paddle/legacy/capi/tests/test_GradientMachine.cpp similarity index 96% rename from paddle/capi/tests/test_GradientMachine.cpp rename to paddle/legacy/capi/tests/test_GradientMachine.cpp index 73b9e477b2..5d1b7cb6ca 100644 --- a/paddle/capi/tests/test_GradientMachine.cpp +++ b/paddle/legacy/capi/tests/test_GradientMachine.cpp @@ -13,13 +13,13 @@ See the License for the specific language governing permissions and limitations under the License. */ #include -#include -#include +#include +#include #include #include #include #include "capi.h" -#include "paddle/utils/ThreadLocal.h" +#include "paddle/legacy/utils/ThreadLocal.h" static std::vector randomBuffer(size_t bufSize) { auto& eng = paddle::ThreadLocalRandomEngine::get(); diff --git a/paddle/capi/tests/test_Matrix.cpp b/paddle/legacy/capi/tests/test_Matrix.cpp similarity index 100% rename from paddle/capi/tests/test_Matrix.cpp rename to paddle/legacy/capi/tests/test_Matrix.cpp diff --git a/paddle/capi/tests/test_Vector.cpp b/paddle/legacy/capi/tests/test_Vector.cpp similarity index 100% rename from paddle/capi/tests/test_Vector.cpp rename to paddle/legacy/capi/tests/test_Vector.cpp diff --git a/paddle/capi/tests/test_predict_network.py b/paddle/legacy/capi/tests/test_predict_network.py similarity index 100% rename from paddle/capi/tests/test_predict_network.py rename to paddle/legacy/capi/tests/test_predict_network.py diff --git a/paddle/capi/vector.h b/paddle/legacy/capi/vector.h similarity index 100% rename from paddle/capi/vector.h rename to paddle/legacy/capi/vector.h diff --git a/paddle/cuda/CMakeLists.txt b/paddle/legacy/cuda/CMakeLists.txt similarity index 100% rename from paddle/cuda/CMakeLists.txt rename to paddle/legacy/cuda/CMakeLists.txt diff --git a/paddle/cuda/include/hl_activation_functions.h b/paddle/legacy/cuda/include/hl_activation_functions.h similarity index 100% rename from paddle/cuda/include/hl_activation_functions.h rename to paddle/legacy/cuda/include/hl_activation_functions.h diff --git a/paddle/cuda/include/hl_aggregate.h b/paddle/legacy/cuda/include/hl_aggregate.h similarity index 100% rename from paddle/cuda/include/hl_aggregate.h rename to paddle/legacy/cuda/include/hl_aggregate.h diff --git a/paddle/cuda/include/hl_avx_functions.h b/paddle/legacy/cuda/include/hl_avx_functions.h similarity index 100% rename from paddle/cuda/include/hl_avx_functions.h rename to paddle/legacy/cuda/include/hl_avx_functions.h diff --git a/paddle/cuda/include/hl_base.h b/paddle/legacy/cuda/include/hl_base.h similarity index 98% rename from paddle/cuda/include/hl_base.h rename to paddle/legacy/cuda/include/hl_base.h index 77f5d82dbe..bfe812a438 100644 --- a/paddle/cuda/include/hl_base.h +++ b/paddle/legacy/cuda/include/hl_base.h @@ -207,8 +207,8 @@ typedef struct { #ifdef __NVCC__ #include -#include "paddle/cuda/include/hl_cuda.h" -#include "paddle/utils/Logging.h" +#include "paddle/legacy/cuda/include/hl_cuda.h" +#include "paddle/legacy/utils/Logging.h" extern __thread bool g_sync_flag; extern __thread cudaStream_t default_stream; diff --git a/paddle/cuda/include/hl_batch_norm.h b/paddle/legacy/cuda/include/hl_batch_norm.h similarity index 100% rename from paddle/cuda/include/hl_batch_norm.h rename to paddle/legacy/cuda/include/hl_batch_norm.h diff --git a/paddle/cuda/include/hl_batch_transpose.h b/paddle/legacy/cuda/include/hl_batch_transpose.h similarity index 100% rename from paddle/cuda/include/hl_batch_transpose.h rename to paddle/legacy/cuda/include/hl_batch_transpose.h diff --git a/paddle/cuda/include/hl_cnn.h b/paddle/legacy/cuda/include/hl_cnn.h similarity index 100% rename from paddle/cuda/include/hl_cnn.h rename to paddle/legacy/cuda/include/hl_cnn.h diff --git a/paddle/cuda/include/hl_cpu_gru.cuh b/paddle/legacy/cuda/include/hl_cpu_gru.cuh similarity index 100% rename from paddle/cuda/include/hl_cpu_gru.cuh rename to paddle/legacy/cuda/include/hl_cpu_gru.cuh diff --git a/paddle/cuda/include/hl_cpu_lstm.cuh b/paddle/legacy/cuda/include/hl_cpu_lstm.cuh similarity index 100% rename from paddle/cuda/include/hl_cpu_lstm.cuh rename to paddle/legacy/cuda/include/hl_cpu_lstm.cuh diff --git a/paddle/cuda/include/hl_cpu_matrix_kernel.cuh b/paddle/legacy/cuda/include/hl_cpu_matrix_kernel.cuh similarity index 100% rename from paddle/cuda/include/hl_cpu_matrix_kernel.cuh rename to paddle/legacy/cuda/include/hl_cpu_matrix_kernel.cuh diff --git a/paddle/cuda/include/hl_cpu_matrix_kernel_detail.cuh b/paddle/legacy/cuda/include/hl_cpu_matrix_kernel_detail.cuh similarity index 100% rename from paddle/cuda/include/hl_cpu_matrix_kernel_detail.cuh rename to paddle/legacy/cuda/include/hl_cpu_matrix_kernel_detail.cuh diff --git a/paddle/cuda/include/hl_cpu_scalar.cuh b/paddle/legacy/cuda/include/hl_cpu_scalar.cuh similarity index 100% rename from paddle/cuda/include/hl_cpu_scalar.cuh rename to paddle/legacy/cuda/include/hl_cpu_scalar.cuh diff --git a/paddle/cuda/include/hl_cpu_simd_neon.cuh b/paddle/legacy/cuda/include/hl_cpu_simd_neon.cuh similarity index 100% rename from paddle/cuda/include/hl_cpu_simd_neon.cuh rename to paddle/legacy/cuda/include/hl_cpu_simd_neon.cuh diff --git a/paddle/cuda/include/hl_cpu_simd_sse.cuh b/paddle/legacy/cuda/include/hl_cpu_simd_sse.cuh similarity index 100% rename from paddle/cuda/include/hl_cpu_simd_sse.cuh rename to paddle/legacy/cuda/include/hl_cpu_simd_sse.cuh diff --git a/paddle/cuda/include/hl_cuda.h b/paddle/legacy/cuda/include/hl_cuda.h similarity index 100% rename from paddle/cuda/include/hl_cuda.h rename to paddle/legacy/cuda/include/hl_cuda.h diff --git a/paddle/cuda/include/hl_cuda.ph b/paddle/legacy/cuda/include/hl_cuda.ph similarity index 100% rename from paddle/cuda/include/hl_cuda.ph rename to paddle/legacy/cuda/include/hl_cuda.ph diff --git a/paddle/cuda/include/hl_cuda_cublas.h b/paddle/legacy/cuda/include/hl_cuda_cublas.h similarity index 100% rename from paddle/cuda/include/hl_cuda_cublas.h rename to paddle/legacy/cuda/include/hl_cuda_cublas.h diff --git a/paddle/cuda/include/hl_cuda_cudnn.h b/paddle/legacy/cuda/include/hl_cuda_cudnn.h similarity index 100% rename from paddle/cuda/include/hl_cuda_cudnn.h rename to paddle/legacy/cuda/include/hl_cuda_cudnn.h diff --git a/paddle/cuda/include/hl_cuda_cudnn.ph b/paddle/legacy/cuda/include/hl_cuda_cudnn.ph similarity index 100% rename from paddle/cuda/include/hl_cuda_cudnn.ph rename to paddle/legacy/cuda/include/hl_cuda_cudnn.ph diff --git a/paddle/cuda/include/hl_device_functions.cuh b/paddle/legacy/cuda/include/hl_device_functions.cuh similarity index 100% rename from paddle/cuda/include/hl_device_functions.cuh rename to paddle/legacy/cuda/include/hl_device_functions.cuh diff --git a/paddle/cuda/include/hl_functions.h b/paddle/legacy/cuda/include/hl_functions.h similarity index 100% rename from paddle/cuda/include/hl_functions.h rename to paddle/legacy/cuda/include/hl_functions.h diff --git a/paddle/cuda/include/hl_gpu.h b/paddle/legacy/cuda/include/hl_gpu.h similarity index 100% rename from paddle/cuda/include/hl_gpu.h rename to paddle/legacy/cuda/include/hl_gpu.h diff --git a/paddle/cuda/include/hl_gpu_functions.cuh b/paddle/legacy/cuda/include/hl_gpu_functions.cuh similarity index 100% rename from paddle/cuda/include/hl_gpu_functions.cuh rename to paddle/legacy/cuda/include/hl_gpu_functions.cuh diff --git a/paddle/cuda/include/hl_gpu_gru.cuh b/paddle/legacy/cuda/include/hl_gpu_gru.cuh similarity index 99% rename from paddle/cuda/include/hl_gpu_gru.cuh rename to paddle/legacy/cuda/include/hl_gpu_gru.cuh index 9fcad2c3bc..8d299572c7 100644 --- a/paddle/cuda/include/hl_gpu_gru.cuh +++ b/paddle/legacy/cuda/include/hl_gpu_gru.cuh @@ -18,7 +18,7 @@ limitations under the License. */ #ifdef __NVCC__ -#include "paddle/utils/Logging.h" +#include "paddle/legacy/utils/Logging.h" /* * threads(framePerBlock, batchPerBlock) diff --git a/paddle/cuda/include/hl_gpu_lstm.cuh b/paddle/legacy/cuda/include/hl_gpu_lstm.cuh similarity index 99% rename from paddle/cuda/include/hl_gpu_lstm.cuh rename to paddle/legacy/cuda/include/hl_gpu_lstm.cuh index 92517a44d2..aae011b838 100644 --- a/paddle/cuda/include/hl_gpu_lstm.cuh +++ b/paddle/legacy/cuda/include/hl_gpu_lstm.cuh @@ -18,7 +18,7 @@ limitations under the License. */ #ifdef __NVCC__ -#include "paddle/utils/Logging.h" +#include "paddle/legacy/utils/Logging.h" #include "hl_device_functions.cuh" /* diff --git a/paddle/cuda/include/hl_gpu_matrix_kernel.cuh b/paddle/legacy/cuda/include/hl_gpu_matrix_kernel.cuh similarity index 99% rename from paddle/cuda/include/hl_gpu_matrix_kernel.cuh rename to paddle/legacy/cuda/include/hl_gpu_matrix_kernel.cuh index 0db023ce37..6177d23657 100644 --- a/paddle/cuda/include/hl_gpu_matrix_kernel.cuh +++ b/paddle/legacy/cuda/include/hl_gpu_matrix_kernel.cuh @@ -18,7 +18,7 @@ limitations under the License. */ #define HL_GPU_MATRIX_KERNEL_CUH_ #include -#include "paddle/utils/Logging.h" +#include "paddle/legacy/utils/Logging.h" #include "hl_base.h" #ifdef __NVCC__ diff --git a/paddle/cuda/include/hl_gru_ops.cuh b/paddle/legacy/cuda/include/hl_gru_ops.cuh similarity index 100% rename from paddle/cuda/include/hl_gru_ops.cuh rename to paddle/legacy/cuda/include/hl_gru_ops.cuh diff --git a/paddle/cuda/include/hl_lstm.h b/paddle/legacy/cuda/include/hl_lstm.h similarity index 100% rename from paddle/cuda/include/hl_lstm.h rename to paddle/legacy/cuda/include/hl_lstm.h diff --git a/paddle/cuda/include/hl_lstm_ops.cuh b/paddle/legacy/cuda/include/hl_lstm_ops.cuh similarity index 100% rename from paddle/cuda/include/hl_lstm_ops.cuh rename to paddle/legacy/cuda/include/hl_lstm_ops.cuh diff --git a/paddle/cuda/include/hl_matrix.h b/paddle/legacy/cuda/include/hl_matrix.h similarity index 100% rename from paddle/cuda/include/hl_matrix.h rename to paddle/legacy/cuda/include/hl_matrix.h diff --git a/paddle/cuda/include/hl_matrix_apply.cuh b/paddle/legacy/cuda/include/hl_matrix_apply.cuh similarity index 100% rename from paddle/cuda/include/hl_matrix_apply.cuh rename to paddle/legacy/cuda/include/hl_matrix_apply.cuh diff --git a/paddle/cuda/include/hl_matrix_base.cuh b/paddle/legacy/cuda/include/hl_matrix_base.cuh similarity index 100% rename from paddle/cuda/include/hl_matrix_base.cuh rename to paddle/legacy/cuda/include/hl_matrix_base.cuh diff --git a/paddle/cuda/include/hl_matrix_base_detail.cuh b/paddle/legacy/cuda/include/hl_matrix_base_detail.cuh similarity index 100% rename from paddle/cuda/include/hl_matrix_base_detail.cuh rename to paddle/legacy/cuda/include/hl_matrix_base_detail.cuh diff --git a/paddle/cuda/include/hl_matrix_ops.cuh b/paddle/legacy/cuda/include/hl_matrix_ops.cuh similarity index 100% rename from paddle/cuda/include/hl_matrix_ops.cuh rename to paddle/legacy/cuda/include/hl_matrix_ops.cuh diff --git a/paddle/cuda/include/hl_matrix_type.cuh b/paddle/legacy/cuda/include/hl_matrix_type.cuh similarity index 100% rename from paddle/cuda/include/hl_matrix_type.cuh rename to paddle/legacy/cuda/include/hl_matrix_type.cuh diff --git a/paddle/cuda/include/hl_perturbation_util.cuh b/paddle/legacy/cuda/include/hl_perturbation_util.cuh similarity index 100% rename from paddle/cuda/include/hl_perturbation_util.cuh rename to paddle/legacy/cuda/include/hl_perturbation_util.cuh diff --git a/paddle/cuda/include/hl_recurrent_apply.cuh b/paddle/legacy/cuda/include/hl_recurrent_apply.cuh similarity index 100% rename from paddle/cuda/include/hl_recurrent_apply.cuh rename to paddle/legacy/cuda/include/hl_recurrent_apply.cuh diff --git a/paddle/cuda/include/hl_sequence.h b/paddle/legacy/cuda/include/hl_sequence.h similarity index 100% rename from paddle/cuda/include/hl_sequence.h rename to paddle/legacy/cuda/include/hl_sequence.h diff --git a/paddle/cuda/include/hl_sparse.h b/paddle/legacy/cuda/include/hl_sparse.h similarity index 100% rename from paddle/cuda/include/hl_sparse.h rename to paddle/legacy/cuda/include/hl_sparse.h diff --git a/paddle/cuda/include/hl_sparse.ph b/paddle/legacy/cuda/include/hl_sparse.ph similarity index 100% rename from paddle/cuda/include/hl_sparse.ph rename to paddle/legacy/cuda/include/hl_sparse.ph diff --git a/paddle/cuda/include/hl_table_apply.h b/paddle/legacy/cuda/include/hl_table_apply.h similarity index 100% rename from paddle/cuda/include/hl_table_apply.h rename to paddle/legacy/cuda/include/hl_table_apply.h diff --git a/paddle/cuda/include/hl_tensor_ops.h b/paddle/legacy/cuda/include/hl_tensor_ops.h similarity index 100% rename from paddle/cuda/include/hl_tensor_ops.h rename to paddle/legacy/cuda/include/hl_tensor_ops.h diff --git a/paddle/cuda/include/hl_thread.ph b/paddle/legacy/cuda/include/hl_thread.ph similarity index 100% rename from paddle/cuda/include/hl_thread.ph rename to paddle/legacy/cuda/include/hl_thread.ph diff --git a/paddle/cuda/include/hl_time.h b/paddle/legacy/cuda/include/hl_time.h similarity index 100% rename from paddle/cuda/include/hl_time.h rename to paddle/legacy/cuda/include/hl_time.h diff --git a/paddle/cuda/include/hl_top_k.h b/paddle/legacy/cuda/include/hl_top_k.h similarity index 100% rename from paddle/cuda/include/hl_top_k.h rename to paddle/legacy/cuda/include/hl_top_k.h diff --git a/paddle/cuda/include/hl_warpctc_wrap.h b/paddle/legacy/cuda/include/hl_warpctc_wrap.h similarity index 100% rename from paddle/cuda/include/hl_warpctc_wrap.h rename to paddle/legacy/cuda/include/hl_warpctc_wrap.h diff --git a/paddle/cuda/include/stub/hl_aggregate_stub.h b/paddle/legacy/cuda/include/stub/hl_aggregate_stub.h similarity index 100% rename from paddle/cuda/include/stub/hl_aggregate_stub.h rename to paddle/legacy/cuda/include/stub/hl_aggregate_stub.h diff --git a/paddle/cuda/include/stub/hl_cnn_stub.h b/paddle/legacy/cuda/include/stub/hl_cnn_stub.h similarity index 100% rename from paddle/cuda/include/stub/hl_cnn_stub.h rename to paddle/legacy/cuda/include/stub/hl_cnn_stub.h diff --git a/paddle/cuda/include/stub/hl_cuda_cublas_stub.h b/paddle/legacy/cuda/include/stub/hl_cuda_cublas_stub.h similarity index 100% rename from paddle/cuda/include/stub/hl_cuda_cublas_stub.h rename to paddle/legacy/cuda/include/stub/hl_cuda_cublas_stub.h diff --git a/paddle/cuda/include/stub/hl_cuda_cudnn_stub.h b/paddle/legacy/cuda/include/stub/hl_cuda_cudnn_stub.h similarity index 100% rename from paddle/cuda/include/stub/hl_cuda_cudnn_stub.h rename to paddle/legacy/cuda/include/stub/hl_cuda_cudnn_stub.h diff --git a/paddle/cuda/include/stub/hl_cuda_stub.h b/paddle/legacy/cuda/include/stub/hl_cuda_stub.h similarity index 100% rename from paddle/cuda/include/stub/hl_cuda_stub.h rename to paddle/legacy/cuda/include/stub/hl_cuda_stub.h diff --git a/paddle/cuda/include/stub/hl_lstm_stub.h b/paddle/legacy/cuda/include/stub/hl_lstm_stub.h similarity index 100% rename from paddle/cuda/include/stub/hl_lstm_stub.h rename to paddle/legacy/cuda/include/stub/hl_lstm_stub.h diff --git a/paddle/cuda/include/stub/hl_matrix_stub.h b/paddle/legacy/cuda/include/stub/hl_matrix_stub.h similarity index 100% rename from paddle/cuda/include/stub/hl_matrix_stub.h rename to paddle/legacy/cuda/include/stub/hl_matrix_stub.h diff --git a/paddle/cuda/include/stub/hl_sequence_stub.h b/paddle/legacy/cuda/include/stub/hl_sequence_stub.h similarity index 100% rename from paddle/cuda/include/stub/hl_sequence_stub.h rename to paddle/legacy/cuda/include/stub/hl_sequence_stub.h diff --git a/paddle/cuda/include/stub/hl_sparse_stub.h b/paddle/legacy/cuda/include/stub/hl_sparse_stub.h similarity index 100% rename from paddle/cuda/include/stub/hl_sparse_stub.h rename to paddle/legacy/cuda/include/stub/hl_sparse_stub.h diff --git a/paddle/cuda/src/avx_mathfun.h b/paddle/legacy/cuda/src/avx_mathfun.h similarity index 100% rename from paddle/cuda/src/avx_mathfun.h rename to paddle/legacy/cuda/src/avx_mathfun.h diff --git a/paddle/cuda/src/hl_avx_functions.cc b/paddle/legacy/cuda/src/hl_avx_functions.cc similarity index 100% rename from paddle/cuda/src/hl_avx_functions.cc rename to paddle/legacy/cuda/src/hl_avx_functions.cc diff --git a/paddle/cuda/src/hl_batch_norm.cu b/paddle/legacy/cuda/src/hl_batch_norm.cu similarity index 100% rename from paddle/cuda/src/hl_batch_norm.cu rename to paddle/legacy/cuda/src/hl_batch_norm.cu diff --git a/paddle/cuda/src/hl_batch_transpose.cu b/paddle/legacy/cuda/src/hl_batch_transpose.cu similarity index 100% rename from paddle/cuda/src/hl_batch_transpose.cu rename to paddle/legacy/cuda/src/hl_batch_transpose.cu diff --git a/paddle/cuda/src/hl_cpu_functions.cc b/paddle/legacy/cuda/src/hl_cpu_functions.cc similarity index 100% rename from paddle/cuda/src/hl_cpu_functions.cc rename to paddle/legacy/cuda/src/hl_cpu_functions.cc diff --git a/paddle/cuda/src/hl_cuda_aggregate.cu b/paddle/legacy/cuda/src/hl_cuda_aggregate.cu similarity index 99% rename from paddle/cuda/src/hl_cuda_aggregate.cu rename to paddle/legacy/cuda/src/hl_cuda_aggregate.cu index d30c264127..9831c5ecc3 100644 --- a/paddle/cuda/src/hl_cuda_aggregate.cu +++ b/paddle/legacy/cuda/src/hl_cuda_aggregate.cu @@ -18,7 +18,7 @@ limitations under the License. */ #include "hl_cuda.ph" #include "hl_matrix_base.cuh" #include "hl_thread.ph" -#include "paddle/utils/Logging.h" +#include "paddle/legacy/utils/Logging.h" /** * @brief matrix row operator. diff --git a/paddle/cuda/src/hl_cuda_cnn.cu b/paddle/legacy/cuda/src/hl_cuda_cnn.cu similarity index 100% rename from paddle/cuda/src/hl_cuda_cnn.cu rename to paddle/legacy/cuda/src/hl_cuda_cnn.cu diff --git a/paddle/cuda/src/hl_cuda_cublas.cc b/paddle/legacy/cuda/src/hl_cuda_cublas.cc similarity index 99% rename from paddle/cuda/src/hl_cuda_cublas.cc rename to paddle/legacy/cuda/src/hl_cuda_cublas.cc index 975df42878..283b8b6e9c 100644 --- a/paddle/cuda/src/hl_cuda_cublas.cc +++ b/paddle/legacy/cuda/src/hl_cuda_cublas.cc @@ -16,8 +16,8 @@ limitations under the License. */ #include #include "hl_cuda.h" #include "hl_thread.ph" -#include "paddle/utils/DynamicLoader.h" -#include "paddle/utils/Logging.h" +#include "paddle/legacy/utils/DynamicLoader.h" +#include "paddle/legacy/utils/Logging.h" namespace dynload { diff --git a/paddle/cuda/src/hl_cuda_cudnn.cc b/paddle/legacy/cuda/src/hl_cuda_cudnn.cc similarity index 99% rename from paddle/cuda/src/hl_cuda_cudnn.cc rename to paddle/legacy/cuda/src/hl_cuda_cudnn.cc index dfa935dcff..b0ac5aaac2 100644 --- a/paddle/cuda/src/hl_cuda_cudnn.cc +++ b/paddle/legacy/cuda/src/hl_cuda_cudnn.cc @@ -17,8 +17,8 @@ limitations under the License. */ #include #include "hl_cuda_cudnn.ph" #include "hl_thread.ph" -#include "paddle/utils/DynamicLoader.h" -#include "paddle/utils/Logging.h" +#include "paddle/legacy/utils/DynamicLoader.h" +#include "paddle/legacy/utils/Logging.h" DEFINE_int32(cudnn_conv_workspace_limit_in_mb, 4096, diff --git a/paddle/cuda/src/hl_cuda_device.cc b/paddle/legacy/cuda/src/hl_cuda_device.cc similarity index 99% rename from paddle/cuda/src/hl_cuda_device.cc rename to paddle/legacy/cuda/src/hl_cuda_device.cc index 3025aa4852..501e3b0f3b 100644 --- a/paddle/cuda/src/hl_cuda_device.cc +++ b/paddle/legacy/cuda/src/hl_cuda_device.cc @@ -23,8 +23,8 @@ limitations under the License. */ #include #include "hl_cuda.ph" #include "hl_thread.ph" -#include "paddle/utils/Logging.h" -#include "paddle/utils/DynamicLoader.h" +#include "paddle/legacy/utils/Logging.h" +#include "paddle/legacy/utils/DynamicLoader.h" // clang-format on namespace dynload { diff --git a/paddle/cuda/src/hl_cuda_lstm.cu b/paddle/legacy/cuda/src/hl_cuda_lstm.cu similarity index 99% rename from paddle/cuda/src/hl_cuda_lstm.cu rename to paddle/legacy/cuda/src/hl_cuda_lstm.cu index b8c4e433a1..9ac564fd25 100644 --- a/paddle/cuda/src/hl_cuda_lstm.cu +++ b/paddle/legacy/cuda/src/hl_cuda_lstm.cu @@ -16,7 +16,7 @@ limitations under the License. */ #include "hl_base.h" #include "hl_cuda_cublas.h" #include "hl_device_functions.cuh" -#include "paddle/utils/Logging.h" +#include "paddle/legacy/utils/Logging.h" typedef hppl::Active::forward t_forward; typedef hppl::Active::backward t_backward; diff --git a/paddle/cuda/src/hl_cuda_matrix.cu b/paddle/legacy/cuda/src/hl_cuda_matrix.cu similarity index 99% rename from paddle/cuda/src/hl_cuda_matrix.cu rename to paddle/legacy/cuda/src/hl_cuda_matrix.cu index 3e17c8090c..6fe460026b 100644 --- a/paddle/cuda/src/hl_cuda_matrix.cu +++ b/paddle/legacy/cuda/src/hl_cuda_matrix.cu @@ -20,7 +20,7 @@ limitations under the License. */ #include "hl_matrix_ops.cuh" #include "hl_sequence.h" #include "hl_sparse.ph" -#include "paddle/utils/Logging.h" +#include "paddle/legacy/utils/Logging.h" DEFINE_MATRIX_UNARY_OP(Zero, a = 0); DEFINE_MATRIX_TERNARY_PARAMETER_OP(_add, TWO_PARAMETER, c = p1 * a + p2 * b); diff --git a/paddle/cuda/src/hl_cuda_sequence.cu b/paddle/legacy/cuda/src/hl_cuda_sequence.cu similarity index 99% rename from paddle/cuda/src/hl_cuda_sequence.cu rename to paddle/legacy/cuda/src/hl_cuda_sequence.cu index a3a5f038de..1d772b5ce2 100644 --- a/paddle/cuda/src/hl_cuda_sequence.cu +++ b/paddle/legacy/cuda/src/hl_cuda_sequence.cu @@ -14,7 +14,7 @@ limitations under the License. */ #include "hl_base.h" #include "hl_device_functions.cuh" -#include "paddle/utils/Logging.h" +#include "paddle/legacy/utils/Logging.h" __global__ void KeMaxSequenceForward(real* input, const int* sequence, diff --git a/paddle/cuda/src/hl_cuda_sparse.cu b/paddle/legacy/cuda/src/hl_cuda_sparse.cu similarity index 99% rename from paddle/cuda/src/hl_cuda_sparse.cu rename to paddle/legacy/cuda/src/hl_cuda_sparse.cu index 432041fed5..8065a6f9f6 100644 --- a/paddle/cuda/src/hl_cuda_sparse.cu +++ b/paddle/legacy/cuda/src/hl_cuda_sparse.cu @@ -18,7 +18,7 @@ limitations under the License. */ #include "hl_matrix_ops.cuh" #include "hl_sparse.h" #include "hl_sparse.ph" -#include "paddle/utils/Logging.h" +#include "paddle/legacy/utils/Logging.h" DEFINE_MATRIX_UNARY_PARAMETER_OP(mul_scalar, ONE_PARAMETER, a = a * p); DEFINE_MATRIX_UNARY_OP(Zero, a = 0); diff --git a/paddle/cuda/src/hl_cuda_sparse.cuh b/paddle/legacy/cuda/src/hl_cuda_sparse.cuh similarity index 100% rename from paddle/cuda/src/hl_cuda_sparse.cuh rename to paddle/legacy/cuda/src/hl_cuda_sparse.cuh diff --git a/paddle/cuda/src/hl_math.cc b/paddle/legacy/cuda/src/hl_math.cc similarity index 100% rename from paddle/cuda/src/hl_math.cc rename to paddle/legacy/cuda/src/hl_math.cc diff --git a/paddle/cuda/src/hl_perturbation_util.cu b/paddle/legacy/cuda/src/hl_perturbation_util.cu similarity index 100% rename from paddle/cuda/src/hl_perturbation_util.cu rename to paddle/legacy/cuda/src/hl_perturbation_util.cu diff --git a/paddle/cuda/src/hl_table_apply.cu b/paddle/legacy/cuda/src/hl_table_apply.cu similarity index 98% rename from paddle/cuda/src/hl_table_apply.cu rename to paddle/legacy/cuda/src/hl_table_apply.cu index efa4bef02b..7411ae35d3 100644 --- a/paddle/cuda/src/hl_table_apply.cu +++ b/paddle/legacy/cuda/src/hl_table_apply.cu @@ -15,7 +15,7 @@ limitations under the License. */ #include "hl_base.h" #include "hl_cuda.h" #include "hl_device_functions.cuh" -#include "paddle/utils/Logging.h" +#include "paddle/legacy/utils/Logging.h" template __global__ void KeMatrixAddRows(real* output, diff --git a/paddle/cuda/src/hl_time.cc b/paddle/legacy/cuda/src/hl_time.cc similarity index 100% rename from paddle/cuda/src/hl_time.cc rename to paddle/legacy/cuda/src/hl_time.cc diff --git a/paddle/cuda/src/hl_top_k.cu b/paddle/legacy/cuda/src/hl_top_k.cu similarity index 98% rename from paddle/cuda/src/hl_top_k.cu rename to paddle/legacy/cuda/src/hl_top_k.cu index b17290557c..041ac419f5 100644 --- a/paddle/cuda/src/hl_top_k.cu +++ b/paddle/legacy/cuda/src/hl_top_k.cu @@ -12,10 +12,10 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. */ -#include "paddle/cuda/include/hl_base.h" -#include "paddle/cuda/include/hl_sparse.ph" -#include "paddle/cuda/include/hl_top_k.h" -#include "paddle/utils/Logging.h" +#include "paddle/legacy/cuda/include/hl_base.h" +#include "paddle/legacy/cuda/include/hl_sparse.ph" +#include "paddle/legacy/cuda/include/hl_top_k.h" +#include "paddle/legacy/utils/Logging.h" // using namespace hppl; diff --git a/paddle/cuda/src/hl_warpctc_wrap.cc b/paddle/legacy/cuda/src/hl_warpctc_wrap.cc similarity index 98% rename from paddle/cuda/src/hl_warpctc_wrap.cc rename to paddle/legacy/cuda/src/hl_warpctc_wrap.cc index 5111bceaff..31a8652f1f 100644 --- a/paddle/cuda/src/hl_warpctc_wrap.cc +++ b/paddle/legacy/cuda/src/hl_warpctc_wrap.cc @@ -14,8 +14,8 @@ limitations under the License. */ #include "hl_warpctc_wrap.h" #include -#include "paddle/utils/DynamicLoader.h" -#include "paddle/utils/Logging.h" +#include "paddle/legacy/utils/DynamicLoader.h" +#include "paddle/legacy/utils/Logging.h" namespace dynload { diff --git a/paddle/function/BlockExpandOp.cpp b/paddle/legacy/function/BlockExpandOp.cpp similarity index 100% rename from paddle/function/BlockExpandOp.cpp rename to paddle/legacy/function/BlockExpandOp.cpp diff --git a/paddle/function/BlockExpandOpTest.cpp b/paddle/legacy/function/BlockExpandOpTest.cpp similarity index 100% rename from paddle/function/BlockExpandOpTest.cpp rename to paddle/legacy/function/BlockExpandOpTest.cpp diff --git a/paddle/function/BufferArg.cpp b/paddle/legacy/function/BufferArg.cpp similarity index 97% rename from paddle/function/BufferArg.cpp rename to paddle/legacy/function/BufferArg.cpp index 2dc931c5d7..1f3d505c31 100644 --- a/paddle/function/BufferArg.cpp +++ b/paddle/legacy/function/BufferArg.cpp @@ -15,7 +15,7 @@ limitations under the License. */ #include #include "BufferArg.h" -#include "paddle/math/SparseMatrix.h" +#include "paddle/legacy/math/SparseMatrix.h" namespace paddle { diff --git a/paddle/function/BufferArg.h b/paddle/legacy/function/BufferArg.h similarity index 99% rename from paddle/function/BufferArg.h rename to paddle/legacy/function/BufferArg.h index 6de8c94e77..1f47ad556d 100644 --- a/paddle/function/BufferArg.h +++ b/paddle/legacy/function/BufferArg.h @@ -18,7 +18,7 @@ limitations under the License. */ #include "TensorShape.h" #include "TensorType.h" -#include "paddle/math/Matrix.h" +#include "paddle/legacy/math/Matrix.h" namespace paddle { diff --git a/paddle/function/BufferArgTest.cpp b/paddle/legacy/function/BufferArgTest.cpp similarity index 96% rename from paddle/function/BufferArgTest.cpp rename to paddle/legacy/function/BufferArgTest.cpp index 1a6e0110af..1ec153bea8 100644 --- a/paddle/function/BufferArgTest.cpp +++ b/paddle/legacy/function/BufferArgTest.cpp @@ -14,7 +14,7 @@ limitations under the License. */ #include "BufferArg.h" #include -#include "paddle/math/MemoryHandle.h" +#include "paddle/legacy/math/MemoryHandle.h" namespace paddle { diff --git a/paddle/function/CMakeLists.txt b/paddle/legacy/function/CMakeLists.txt similarity index 100% rename from paddle/function/CMakeLists.txt rename to paddle/legacy/function/CMakeLists.txt diff --git a/paddle/function/ContextProjectionOp.cpp b/paddle/legacy/function/ContextProjectionOp.cpp similarity index 99% rename from paddle/function/ContextProjectionOp.cpp rename to paddle/legacy/function/ContextProjectionOp.cpp index 1187842452..05a3f91586 100644 --- a/paddle/function/ContextProjectionOp.cpp +++ b/paddle/legacy/function/ContextProjectionOp.cpp @@ -13,8 +13,8 @@ See the License for the specific language governing permissions and limitations under the License. */ #include "ContextProjectionOp.h" -#include "paddle/math/Matrix.h" -#include "paddle/math/Vector.h" +#include "paddle/legacy/math/Matrix.h" +#include "paddle/legacy/math/Vector.h" namespace paddle { /** diff --git a/paddle/function/ContextProjectionOp.h b/paddle/legacy/function/ContextProjectionOp.h similarity index 100% rename from paddle/function/ContextProjectionOp.h rename to paddle/legacy/function/ContextProjectionOp.h diff --git a/paddle/function/ContextProjectionOpGpu.cu b/paddle/legacy/function/ContextProjectionOpGpu.cu similarity index 100% rename from paddle/function/ContextProjectionOpGpu.cu rename to paddle/legacy/function/ContextProjectionOpGpu.cu diff --git a/paddle/function/ContextProjectionOpTest.cpp b/paddle/legacy/function/ContextProjectionOpTest.cpp similarity index 99% rename from paddle/function/ContextProjectionOpTest.cpp rename to paddle/legacy/function/ContextProjectionOpTest.cpp index d805c3ae92..3b0a34567f 100644 --- a/paddle/function/ContextProjectionOpTest.cpp +++ b/paddle/legacy/function/ContextProjectionOpTest.cpp @@ -14,7 +14,7 @@ limitations under the License. */ #include #include "FunctionTest.h" -#include "paddle/math/Matrix.h" +#include "paddle/legacy/math/Matrix.h" #include "paddle/testing/TestUtil.h" using namespace paddle; // NOLINT diff --git a/paddle/function/ConvOp.h b/paddle/legacy/function/ConvOp.h similarity index 100% rename from paddle/function/ConvOp.h rename to paddle/legacy/function/ConvOp.h diff --git a/paddle/function/ConvOpTest.h b/paddle/legacy/function/ConvOpTest.h similarity index 100% rename from paddle/function/ConvOpTest.h rename to paddle/legacy/function/ConvOpTest.h diff --git a/paddle/function/CosSimOp.cpp b/paddle/legacy/function/CosSimOp.cpp similarity index 99% rename from paddle/function/CosSimOp.cpp rename to paddle/legacy/function/CosSimOp.cpp index 2c25e1af44..d04f4396ca 100644 --- a/paddle/function/CosSimOp.cpp +++ b/paddle/legacy/function/CosSimOp.cpp @@ -13,8 +13,8 @@ See the License for the specific language governing permissions and limitations under the License. */ #include "CosSimOp.h" -#include "paddle/math/Matrix.h" -#include "paddle/math/Vector.h" +#include "paddle/legacy/math/Matrix.h" +#include "paddle/legacy/math/Vector.h" namespace paddle { /** diff --git a/paddle/function/CosSimOp.h b/paddle/legacy/function/CosSimOp.h similarity index 100% rename from paddle/function/CosSimOp.h rename to paddle/legacy/function/CosSimOp.h diff --git a/paddle/function/CosSimOpGpu.cu b/paddle/legacy/function/CosSimOpGpu.cu similarity index 100% rename from paddle/function/CosSimOpGpu.cu rename to paddle/legacy/function/CosSimOpGpu.cu diff --git a/paddle/function/CosSimOpTest.cpp b/paddle/legacy/function/CosSimOpTest.cpp similarity index 98% rename from paddle/function/CosSimOpTest.cpp rename to paddle/legacy/function/CosSimOpTest.cpp index 42b02da0cb..31bb43e1ba 100644 --- a/paddle/function/CosSimOpTest.cpp +++ b/paddle/legacy/function/CosSimOpTest.cpp @@ -14,7 +14,7 @@ limitations under the License. */ #include #include "FunctionTest.h" -#include "paddle/math/Matrix.h" +#include "paddle/legacy/math/Matrix.h" using namespace paddle; // NOLINT diff --git a/paddle/function/CropOp.cpp b/paddle/legacy/function/CropOp.cpp similarity index 98% rename from paddle/function/CropOp.cpp rename to paddle/legacy/function/CropOp.cpp index 5bd98910fe..e22678822f 100644 --- a/paddle/function/CropOp.cpp +++ b/paddle/legacy/function/CropOp.cpp @@ -13,8 +13,8 @@ See the License for the specific language governing permissions and limitations under the License. */ #include "CropOp.h" -#include "paddle/function/TensorShape.h" -#include "paddle/math/Vector.h" +#include "paddle/legacy/function/TensorShape.h" +#include "paddle/legacy/math/Vector.h" namespace paddle { diff --git a/paddle/function/CropOp.h b/paddle/legacy/function/CropOp.h similarity index 100% rename from paddle/function/CropOp.h rename to paddle/legacy/function/CropOp.h diff --git a/paddle/function/CropOpGpu.cu b/paddle/legacy/function/CropOpGpu.cu similarity index 100% rename from paddle/function/CropOpGpu.cu rename to paddle/legacy/function/CropOpGpu.cu diff --git a/paddle/function/CropOpTest.cpp b/paddle/legacy/function/CropOpTest.cpp similarity index 100% rename from paddle/function/CropOpTest.cpp rename to paddle/legacy/function/CropOpTest.cpp diff --git a/paddle/function/CrossMapNormalOp.cpp b/paddle/legacy/function/CrossMapNormalOp.cpp similarity index 99% rename from paddle/function/CrossMapNormalOp.cpp rename to paddle/legacy/function/CrossMapNormalOp.cpp index 7ff9227e5c..f28703af00 100644 --- a/paddle/function/CrossMapNormalOp.cpp +++ b/paddle/legacy/function/CrossMapNormalOp.cpp @@ -13,7 +13,7 @@ See the License for the specific language governing permissions and limitations under the License. */ #include "CrossMapNormalOp.h" -#include "paddle/math/Vector.h" +#include "paddle/legacy/math/Vector.h" namespace paddle { diff --git a/paddle/function/CrossMapNormalOp.h b/paddle/legacy/function/CrossMapNormalOp.h similarity index 100% rename from paddle/function/CrossMapNormalOp.h rename to paddle/legacy/function/CrossMapNormalOp.h diff --git a/paddle/function/CrossMapNormalOpGpu.cu b/paddle/legacy/function/CrossMapNormalOpGpu.cu similarity index 100% rename from paddle/function/CrossMapNormalOpGpu.cu rename to paddle/legacy/function/CrossMapNormalOpGpu.cu diff --git a/paddle/function/CrossMapNormalOpTest.cpp b/paddle/legacy/function/CrossMapNormalOpTest.cpp similarity index 100% rename from paddle/function/CrossMapNormalOpTest.cpp rename to paddle/legacy/function/CrossMapNormalOpTest.cpp diff --git a/paddle/function/DepthwiseConvOp.cpp b/paddle/legacy/function/DepthwiseConvOp.cpp similarity index 100% rename from paddle/function/DepthwiseConvOp.cpp rename to paddle/legacy/function/DepthwiseConvOp.cpp diff --git a/paddle/function/DepthwiseConvOp.h b/paddle/legacy/function/DepthwiseConvOp.h similarity index 100% rename from paddle/function/DepthwiseConvOp.h rename to paddle/legacy/function/DepthwiseConvOp.h diff --git a/paddle/function/DepthwiseConvOpGpu.cu b/paddle/legacy/function/DepthwiseConvOpGpu.cu similarity index 99% rename from paddle/function/DepthwiseConvOpGpu.cu rename to paddle/legacy/function/DepthwiseConvOpGpu.cu index 2c0e71b19b..17138cc563 100644 --- a/paddle/function/DepthwiseConvOpGpu.cu +++ b/paddle/legacy/function/DepthwiseConvOpGpu.cu @@ -13,7 +13,7 @@ See the License for the specific language governing permissions and limitations under the License. */ #include "DepthwiseConvOp.h" -#include "paddle/math/BaseMatrix.h" +#include "paddle/legacy/math/BaseMatrix.h" namespace paddle { diff --git a/paddle/function/DepthwiseConvOpTest.cpp b/paddle/legacy/function/DepthwiseConvOpTest.cpp similarity index 100% rename from paddle/function/DepthwiseConvOpTest.cpp rename to paddle/legacy/function/DepthwiseConvOpTest.cpp diff --git a/paddle/function/EigenGemm.cpp b/paddle/legacy/function/EigenGemm.cpp similarity index 98% rename from paddle/function/EigenGemm.cpp rename to paddle/legacy/function/EigenGemm.cpp index 8e9dbbd7a1..5929c5c68e 100644 --- a/paddle/function/EigenGemm.cpp +++ b/paddle/legacy/function/EigenGemm.cpp @@ -13,7 +13,7 @@ See the License for the specific language governing permissions and limitations under the License. */ #include -#include "paddle/function/EigenThreadDevice.h" +#include "paddle/legacy/function/EigenThreadDevice.h" namespace paddle { diff --git a/paddle/function/EigenThreadDevice.h b/paddle/legacy/function/EigenThreadDevice.h similarity index 100% rename from paddle/function/EigenThreadDevice.h rename to paddle/legacy/function/EigenThreadDevice.h diff --git a/paddle/function/Function.cpp b/paddle/legacy/function/Function.cpp similarity index 100% rename from paddle/function/Function.cpp rename to paddle/legacy/function/Function.cpp diff --git a/paddle/function/Function.h b/paddle/legacy/function/Function.h similarity index 97% rename from paddle/function/Function.h rename to paddle/legacy/function/Function.h index a6c14ef29b..bc5ef7e6f2 100644 --- a/paddle/function/Function.h +++ b/paddle/legacy/function/Function.h @@ -17,10 +17,10 @@ limitations under the License. */ #include #include #include "BufferArg.h" -#include "paddle/math/Matrix.h" -#include "paddle/utils/Any.h" -#include "paddle/utils/ClassRegistrar.h" -#include "paddle/utils/Error.h" +#include "paddle/legacy/math/Matrix.h" +#include "paddle/legacy/utils/Any.h" +#include "paddle/legacy/utils/ClassRegistrar.h" +#include "paddle/legacy/utils/Error.h" namespace paddle { diff --git a/paddle/function/FunctionTest.cpp b/paddle/legacy/function/FunctionTest.cpp similarity index 99% rename from paddle/function/FunctionTest.cpp rename to paddle/legacy/function/FunctionTest.cpp index f5e6ca3f51..1a0993e313 100644 --- a/paddle/function/FunctionTest.cpp +++ b/paddle/legacy/function/FunctionTest.cpp @@ -14,7 +14,7 @@ limitations under the License. */ #include "Function.h" #include -#include "paddle/math/SparseMatrix.h" +#include "paddle/legacy/math/SparseMatrix.h" namespace paddle { diff --git a/paddle/function/FunctionTest.h b/paddle/legacy/function/FunctionTest.h similarity index 99% rename from paddle/function/FunctionTest.h rename to paddle/legacy/function/FunctionTest.h index 14003d2c88..6f01981a34 100644 --- a/paddle/function/FunctionTest.h +++ b/paddle/legacy/function/FunctionTest.h @@ -13,9 +13,9 @@ See the License for the specific language governing permissions and limitations under the License. */ #include "Function.h" -#include "paddle/math/Matrix.h" -#include "paddle/math/SparseMatrix.h" -#include "paddle/math/tests/TensorCheck.h" +#include "paddle/legacy/math/Matrix.h" +#include "paddle/legacy/math/SparseMatrix.h" +#include "paddle/legacy/math/tests/TensorCheck.h" #include "paddle/testing/TestUtil.h" namespace paddle { diff --git a/paddle/function/GemmConvOp.cpp b/paddle/legacy/function/GemmConvOp.cpp similarity index 99% rename from paddle/function/GemmConvOp.cpp rename to paddle/legacy/function/GemmConvOp.cpp index 5b023e2c10..5a81315661 100644 --- a/paddle/function/GemmConvOp.cpp +++ b/paddle/legacy/function/GemmConvOp.cpp @@ -15,7 +15,7 @@ limitations under the License. */ #include "ConvOp.h" #include "GemmFunctor.h" #include "Im2Col.h" -#include "paddle/math/MemoryHandle.h" +#include "paddle/legacy/math/MemoryHandle.h" namespace paddle { diff --git a/paddle/function/GemmConvOpTest.cpp b/paddle/legacy/function/GemmConvOpTest.cpp similarity index 100% rename from paddle/function/GemmConvOpTest.cpp rename to paddle/legacy/function/GemmConvOpTest.cpp diff --git a/paddle/function/GemmFunctor.cpp b/paddle/legacy/function/GemmFunctor.cpp similarity index 98% rename from paddle/function/GemmFunctor.cpp rename to paddle/legacy/function/GemmFunctor.cpp index 0b1fe1b67d..450293dfee 100644 --- a/paddle/function/GemmFunctor.cpp +++ b/paddle/legacy/function/GemmFunctor.cpp @@ -13,7 +13,7 @@ See the License for the specific language governing permissions and limitations under the License. */ #include "GemmFunctor.h" -#include "paddle/math/MathFunctions.h" +#include "paddle/legacy/math/MathFunctions.h" namespace paddle { diff --git a/paddle/function/GemmFunctor.h b/paddle/legacy/function/GemmFunctor.h similarity index 100% rename from paddle/function/GemmFunctor.h rename to paddle/legacy/function/GemmFunctor.h diff --git a/paddle/function/GruFunctor.h b/paddle/legacy/function/GruFunctor.h similarity index 100% rename from paddle/function/GruFunctor.h rename to paddle/legacy/function/GruFunctor.h diff --git a/paddle/function/Im2Col.h b/paddle/legacy/function/Im2Col.h similarity index 100% rename from paddle/function/Im2Col.h rename to paddle/legacy/function/Im2Col.h diff --git a/paddle/function/Im2ColOp.cpp b/paddle/legacy/function/Im2ColOp.cpp similarity index 100% rename from paddle/function/Im2ColOp.cpp rename to paddle/legacy/function/Im2ColOp.cpp diff --git a/paddle/function/Im2ColOpGpu.cu b/paddle/legacy/function/Im2ColOpGpu.cu similarity index 100% rename from paddle/function/Im2ColOpGpu.cu rename to paddle/legacy/function/Im2ColOpGpu.cu diff --git a/paddle/function/Im2ColTest.cpp b/paddle/legacy/function/Im2ColTest.cpp similarity index 99% rename from paddle/function/Im2ColTest.cpp rename to paddle/legacy/function/Im2ColTest.cpp index 967c5b9153..2c5f06f389 100644 --- a/paddle/function/Im2ColTest.cpp +++ b/paddle/legacy/function/Im2ColTest.cpp @@ -15,8 +15,8 @@ limitations under the License. */ #include "Im2Col.h" #include #include "Function.h" -#include "paddle/math/Matrix.h" -#include "paddle/math/tests/TensorCheck.h" +#include "paddle/legacy/math/Matrix.h" +#include "paddle/legacy/math/tests/TensorCheck.h" namespace paddle { diff --git a/paddle/function/MulOp.cpp b/paddle/legacy/function/MulOp.cpp similarity index 99% rename from paddle/function/MulOp.cpp rename to paddle/legacy/function/MulOp.cpp index 7bf36c8050..750978fc90 100644 --- a/paddle/function/MulOp.cpp +++ b/paddle/legacy/function/MulOp.cpp @@ -14,8 +14,8 @@ limitations under the License. */ #include "MulOp.h" #include "GemmFunctor.h" -#include "paddle/math/SIMDFunctions.h" -#include "paddle/utils/ThreadLocal.h" +#include "paddle/legacy/math/SIMDFunctions.h" +#include "paddle/legacy/utils/ThreadLocal.h" namespace { inline void vecAddTo(real* a, const real* b, real scaleB, size_t len) { diff --git a/paddle/function/MulOp.h b/paddle/legacy/function/MulOp.h similarity index 97% rename from paddle/function/MulOp.h rename to paddle/legacy/function/MulOp.h index e6057be4e5..ab33bde172 100644 --- a/paddle/function/MulOp.h +++ b/paddle/legacy/function/MulOp.h @@ -15,8 +15,8 @@ limitations under the License. */ #pragma once #include "Function.h" -#include "paddle/math/Matrix.h" -#include "paddle/math/SparseMatrix.h" +#include "paddle/legacy/math/Matrix.h" +#include "paddle/legacy/math/SparseMatrix.h" namespace paddle { /// CPU, dense matrix (+)= dense matrix * dense matrix diff --git a/paddle/function/MulOpGpu.cu b/paddle/legacy/function/MulOpGpu.cu similarity index 98% rename from paddle/function/MulOpGpu.cu rename to paddle/legacy/function/MulOpGpu.cu index d63416a8e4..217c983cb7 100644 --- a/paddle/function/MulOpGpu.cu +++ b/paddle/legacy/function/MulOpGpu.cu @@ -14,8 +14,8 @@ limitations under the License. */ #include "MulOp.h" #include "hl_base.h" -#include "paddle/math/Matrix.h" -#include "paddle/math/SparseMatrix.h" +#include "paddle/legacy/math/Matrix.h" +#include "paddle/legacy/math/SparseMatrix.h" namespace paddle { /// dense matrix (+)= dense matrix * dense matrix diff --git a/paddle/function/MulOpTest.cpp b/paddle/legacy/function/MulOpTest.cpp similarity index 98% rename from paddle/function/MulOpTest.cpp rename to paddle/legacy/function/MulOpTest.cpp index 4e1ebd749c..ab08b6f869 100644 --- a/paddle/function/MulOpTest.cpp +++ b/paddle/legacy/function/MulOpTest.cpp @@ -14,9 +14,9 @@ limitations under the License. */ #include #include "FunctionTest.h" -#include "paddle/math/Matrix.h" -#include "paddle/math/SparseMatrix.h" -#include "paddle/math/tests/test_matrixUtil.h" +#include "paddle/legacy/math/Matrix.h" +#include "paddle/legacy/math/SparseMatrix.h" +#include "paddle/legacy/math/tests/test_matrixUtil.h" #include "paddle/testing/TestUtil.h" using namespace paddle; // NOLINT diff --git a/paddle/function/NaiveConvOp.cpp b/paddle/legacy/function/NaiveConvOp.cpp similarity index 100% rename from paddle/function/NaiveConvOp.cpp rename to paddle/legacy/function/NaiveConvOp.cpp diff --git a/paddle/function/PadOp.cpp b/paddle/legacy/function/PadOp.cpp similarity index 99% rename from paddle/function/PadOp.cpp rename to paddle/legacy/function/PadOp.cpp index 5d7515e8c0..9d011d28e6 100644 --- a/paddle/function/PadOp.cpp +++ b/paddle/legacy/function/PadOp.cpp @@ -13,7 +13,7 @@ See the License for the specific language governing permissions and limitations under the License. */ #include "PadOp.h" -#include "paddle/math/Vector.h" +#include "paddle/legacy/math/Vector.h" namespace paddle { diff --git a/paddle/function/PadOp.h b/paddle/legacy/function/PadOp.h similarity index 100% rename from paddle/function/PadOp.h rename to paddle/legacy/function/PadOp.h diff --git a/paddle/function/PadOpGpu.cu b/paddle/legacy/function/PadOpGpu.cu similarity index 100% rename from paddle/function/PadOpGpu.cu rename to paddle/legacy/function/PadOpGpu.cu diff --git a/paddle/function/PadOpTest.cpp b/paddle/legacy/function/PadOpTest.cpp similarity index 100% rename from paddle/function/PadOpTest.cpp rename to paddle/legacy/function/PadOpTest.cpp diff --git a/paddle/function/RowConvOp.cpp b/paddle/legacy/function/RowConvOp.cpp similarity index 99% rename from paddle/function/RowConvOp.cpp rename to paddle/legacy/function/RowConvOp.cpp index 129e933458..3be50e80d7 100644 --- a/paddle/function/RowConvOp.cpp +++ b/paddle/legacy/function/RowConvOp.cpp @@ -14,7 +14,7 @@ limitations under the License. */ #include "RowConvOp.h" #include -#include "paddle/math/Vector.h" +#include "paddle/legacy/math/Vector.h" namespace paddle { diff --git a/paddle/function/RowConvOp.h b/paddle/legacy/function/RowConvOp.h similarity index 100% rename from paddle/function/RowConvOp.h rename to paddle/legacy/function/RowConvOp.h diff --git a/paddle/function/RowConvOpGpu.cu b/paddle/legacy/function/RowConvOpGpu.cu similarity index 99% rename from paddle/function/RowConvOpGpu.cu rename to paddle/legacy/function/RowConvOpGpu.cu index f820ee9a97..a6d2e4c7e3 100644 --- a/paddle/function/RowConvOpGpu.cu +++ b/paddle/legacy/function/RowConvOpGpu.cu @@ -12,8 +12,8 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. */ -#include "paddle/cuda/include/hl_base.h" -#include "paddle/function/RowConvOp.h" +#include "paddle/legacy/cuda/include/hl_base.h" +#include "paddle/legacy/function/RowConvOp.h" namespace paddle { diff --git a/paddle/function/RowConvOpTest.cpp b/paddle/legacy/function/RowConvOpTest.cpp similarity index 100% rename from paddle/function/RowConvOpTest.cpp rename to paddle/legacy/function/RowConvOpTest.cpp diff --git a/paddle/function/ScaleSubRegionOp.cpp b/paddle/legacy/function/ScaleSubRegionOp.cpp similarity index 99% rename from paddle/function/ScaleSubRegionOp.cpp rename to paddle/legacy/function/ScaleSubRegionOp.cpp index 9a06ef2a96..03a422a740 100644 --- a/paddle/function/ScaleSubRegionOp.cpp +++ b/paddle/legacy/function/ScaleSubRegionOp.cpp @@ -13,7 +13,7 @@ See the License for the specific language governing permissions and limitations under the License. */ #include "ScaleSubRegionOp.h" -#include "paddle/function/TensorShape.h" +#include "paddle/legacy/function/TensorShape.h" namespace paddle { diff --git a/paddle/function/ScaleSubRegionOp.h b/paddle/legacy/function/ScaleSubRegionOp.h similarity index 100% rename from paddle/function/ScaleSubRegionOp.h rename to paddle/legacy/function/ScaleSubRegionOp.h diff --git a/paddle/function/ScaleSubRegionOpGpu.cu b/paddle/legacy/function/ScaleSubRegionOpGpu.cu similarity index 100% rename from paddle/function/ScaleSubRegionOpGpu.cu rename to paddle/legacy/function/ScaleSubRegionOpGpu.cu diff --git a/paddle/function/ScaleSubRegionOpTest.cpp b/paddle/legacy/function/ScaleSubRegionOpTest.cpp similarity index 100% rename from paddle/function/ScaleSubRegionOpTest.cpp rename to paddle/legacy/function/ScaleSubRegionOpTest.cpp diff --git a/paddle/function/SwitchOp.cpp b/paddle/legacy/function/SwitchOp.cpp similarity index 99% rename from paddle/function/SwitchOp.cpp rename to paddle/legacy/function/SwitchOp.cpp index 750fb6bf28..c6accd1803 100644 --- a/paddle/function/SwitchOp.cpp +++ b/paddle/legacy/function/SwitchOp.cpp @@ -13,7 +13,7 @@ See the License for the specific language governing permissions and limitations under the License. */ #include "SwitchOp.h" -#include "paddle/math/Vector.h" +#include "paddle/legacy/math/Vector.h" namespace paddle { diff --git a/paddle/function/SwitchOp.h b/paddle/legacy/function/SwitchOp.h similarity index 100% rename from paddle/function/SwitchOp.h rename to paddle/legacy/function/SwitchOp.h diff --git a/paddle/function/SwitchOpGpu.cu b/paddle/legacy/function/SwitchOpGpu.cu similarity index 100% rename from paddle/function/SwitchOpGpu.cu rename to paddle/legacy/function/SwitchOpGpu.cu diff --git a/paddle/function/SwitchOpTest.cpp b/paddle/legacy/function/SwitchOpTest.cpp similarity index 100% rename from paddle/function/SwitchOpTest.cpp rename to paddle/legacy/function/SwitchOpTest.cpp diff --git a/paddle/function/TensorShape.h b/paddle/legacy/function/TensorShape.h similarity index 100% rename from paddle/function/TensorShape.h rename to paddle/legacy/function/TensorShape.h diff --git a/paddle/function/TensorShapeTest.cpp b/paddle/legacy/function/TensorShapeTest.cpp similarity index 100% rename from paddle/function/TensorShapeTest.cpp rename to paddle/legacy/function/TensorShapeTest.cpp diff --git a/paddle/function/TensorType.h b/paddle/legacy/function/TensorType.h similarity index 98% rename from paddle/function/TensorType.h rename to paddle/legacy/function/TensorType.h index b384591bd8..13994821be 100644 --- a/paddle/function/TensorType.h +++ b/paddle/legacy/function/TensorType.h @@ -14,7 +14,7 @@ limitations under the License. */ #pragma once -#include "paddle/math/Matrix.h" +#include "paddle/legacy/math/Matrix.h" namespace paddle { diff --git a/paddle/function/TensorTypeTest.cpp b/paddle/legacy/function/TensorTypeTest.cpp similarity index 100% rename from paddle/function/TensorTypeTest.cpp rename to paddle/legacy/function/TensorTypeTest.cpp diff --git a/paddle/function/neon/NeonDepthwiseConv.cpp b/paddle/legacy/function/neon/NeonDepthwiseConv.cpp similarity index 98% rename from paddle/function/neon/NeonDepthwiseConv.cpp rename to paddle/legacy/function/neon/NeonDepthwiseConv.cpp index d7ac83da41..6179635a9f 100644 --- a/paddle/function/neon/NeonDepthwiseConv.cpp +++ b/paddle/legacy/function/neon/NeonDepthwiseConv.cpp @@ -13,7 +13,7 @@ See the License for the specific language governing permissions and limitations under the License. */ #include "NeonDepthwiseConv.h" -#include "paddle/function/ConvOp.h" +#include "paddle/legacy/function/ConvOp.h" namespace paddle { diff --git a/paddle/function/neon/NeonDepthwiseConv.h b/paddle/legacy/function/neon/NeonDepthwiseConv.h similarity index 100% rename from paddle/function/neon/NeonDepthwiseConv.h rename to paddle/legacy/function/neon/NeonDepthwiseConv.h diff --git a/paddle/function/neon/NeonDepthwiseConvTranspose.cpp b/paddle/legacy/function/neon/NeonDepthwiseConvTranspose.cpp similarity index 99% rename from paddle/function/neon/NeonDepthwiseConvTranspose.cpp rename to paddle/legacy/function/neon/NeonDepthwiseConvTranspose.cpp index 1fc5daf607..feb77e1ff9 100644 --- a/paddle/function/neon/NeonDepthwiseConvTranspose.cpp +++ b/paddle/legacy/function/neon/NeonDepthwiseConvTranspose.cpp @@ -13,7 +13,7 @@ See the License for the specific language governing permissions and limitations under the License. */ #include "NeonDepthwiseConv.h" -#include "paddle/function/ConvOp.h" +#include "paddle/legacy/function/ConvOp.h" namespace paddle { diff --git a/paddle/function/neon/neon_util.h b/paddle/legacy/function/neon/neon_util.h similarity index 100% rename from paddle/function/neon/neon_util.h rename to paddle/legacy/function/neon/neon_util.h diff --git a/paddle/function/nnpack/NNPACKConvOp.cpp b/paddle/legacy/function/nnpack/NNPACKConvOp.cpp similarity index 99% rename from paddle/function/nnpack/NNPACKConvOp.cpp rename to paddle/legacy/function/nnpack/NNPACKConvOp.cpp index 48c997b50d..81c832e774 100644 --- a/paddle/function/nnpack/NNPACKConvOp.cpp +++ b/paddle/legacy/function/nnpack/NNPACKConvOp.cpp @@ -13,7 +13,7 @@ See the License for the specific language governing permissions and limitations under the License. */ #include "nnpack.h" -#include "paddle/function/ConvOp.h" +#include "paddle/legacy/function/ConvOp.h" DEFINE_bool(nnpack_allocate_outside, true, diff --git a/paddle/function/nnpack/NNPACKConvOpTest.cpp b/paddle/legacy/function/nnpack/NNPACKConvOpTest.cpp similarity index 95% rename from paddle/function/nnpack/NNPACKConvOpTest.cpp rename to paddle/legacy/function/nnpack/NNPACKConvOpTest.cpp index c80ffb5d5d..a2db83f5a3 100644 --- a/paddle/function/nnpack/NNPACKConvOpTest.cpp +++ b/paddle/legacy/function/nnpack/NNPACKConvOpTest.cpp @@ -13,7 +13,7 @@ See the License for the specific language governing permissions and limitations under the License. */ #include -#include "paddle/function/ConvOpTest.h" +#include "paddle/legacy/function/ConvOpTest.h" namespace paddle { diff --git a/paddle/gserver/CMakeLists.txt b/paddle/legacy/gserver/CMakeLists.txt similarity index 100% rename from paddle/gserver/CMakeLists.txt rename to paddle/legacy/gserver/CMakeLists.txt diff --git a/paddle/gserver/activations/ActivationFunction.cpp b/paddle/legacy/gserver/activations/ActivationFunction.cpp similarity index 98% rename from paddle/gserver/activations/ActivationFunction.cpp rename to paddle/legacy/gserver/activations/ActivationFunction.cpp index 71c238fbfe..ae07c7e6d7 100644 --- a/paddle/gserver/activations/ActivationFunction.cpp +++ b/paddle/legacy/gserver/activations/ActivationFunction.cpp @@ -20,9 +20,9 @@ limitations under the License. */ #include #include #include -#include "paddle/parameter/Argument.h" -#include "paddle/utils/ClassRegistrar.h" -#include "paddle/utils/Logging.h" +#include "paddle/legacy/parameter/Argument.h" +#include "paddle/legacy/utils/ClassRegistrar.h" +#include "paddle/legacy/utils/Logging.h" #ifdef PADDLE_WITH_MKLDNN #include "MKLDNNActivation.h" diff --git a/paddle/gserver/activations/ActivationFunction.h b/paddle/legacy/gserver/activations/ActivationFunction.h similarity index 98% rename from paddle/gserver/activations/ActivationFunction.h rename to paddle/legacy/gserver/activations/ActivationFunction.h index 8e2e144769..8bc5b0f529 100644 --- a/paddle/gserver/activations/ActivationFunction.h +++ b/paddle/legacy/gserver/activations/ActivationFunction.h @@ -15,7 +15,7 @@ limitations under the License. */ #pragma once #include #include -#include "paddle/utils/Error.h" +#include "paddle/legacy/utils/Error.h" namespace paddle { diff --git a/paddle/gserver/activations/MKLDNNActivation.cpp b/paddle/legacy/gserver/activations/MKLDNNActivation.cpp similarity index 99% rename from paddle/gserver/activations/MKLDNNActivation.cpp rename to paddle/legacy/gserver/activations/MKLDNNActivation.cpp index 672444c656..2eed7af70a 100644 --- a/paddle/gserver/activations/MKLDNNActivation.cpp +++ b/paddle/legacy/gserver/activations/MKLDNNActivation.cpp @@ -14,7 +14,7 @@ limitations under the License. */ #include "MKLDNNActivation.h" #include "mkldnn.hpp" -#include "paddle/utils/ClassRegistrar.h" +#include "paddle/legacy/utils/ClassRegistrar.h" namespace paddle { diff --git a/paddle/gserver/activations/MKLDNNActivation.h b/paddle/legacy/gserver/activations/MKLDNNActivation.h similarity index 96% rename from paddle/gserver/activations/MKLDNNActivation.h rename to paddle/legacy/gserver/activations/MKLDNNActivation.h index eece1b9c37..59c447ad07 100644 --- a/paddle/gserver/activations/MKLDNNActivation.h +++ b/paddle/legacy/gserver/activations/MKLDNNActivation.h @@ -15,9 +15,9 @@ limitations under the License. */ #pragma once #include "ActivationFunction.h" #include "mkldnn.hpp" -#include "paddle/gserver/layers/MKLDNNBase.h" -#include "paddle/math/MKLDNNMatrix.h" -#include "paddle/parameter/Argument.h" +#include "paddle/legacy/gserver/layers/MKLDNNBase.h" +#include "paddle/legacy/math/MKLDNNMatrix.h" +#include "paddle/legacy/parameter/Argument.h" namespace paddle { diff --git a/paddle/gserver/dataproviders/DataProvider.cpp b/paddle/legacy/gserver/dataproviders/DataProvider.cpp similarity index 98% rename from paddle/gserver/dataproviders/DataProvider.cpp rename to paddle/legacy/gserver/dataproviders/DataProvider.cpp index 580cf821c6..b67af8a326 100644 --- a/paddle/gserver/dataproviders/DataProvider.cpp +++ b/paddle/legacy/gserver/dataproviders/DataProvider.cpp @@ -16,10 +16,10 @@ limitations under the License. */ #include #include -#include "paddle/utils/Logging.h" -#include "paddle/utils/Stat.h" -#include "paddle/utils/StringUtil.h" -#include "paddle/utils/Util.h" +#include "paddle/legacy/utils/Logging.h" +#include "paddle/legacy/utils/Stat.h" +#include "paddle/legacy/utils/StringUtil.h" +#include "paddle/legacy/utils/Util.h" namespace paddle { diff --git a/paddle/gserver/dataproviders/DataProvider.h b/paddle/legacy/gserver/dataproviders/DataProvider.h similarity index 96% rename from paddle/gserver/dataproviders/DataProvider.h rename to paddle/legacy/gserver/dataproviders/DataProvider.h index 21822b10c2..c2e1c5fdd6 100644 --- a/paddle/gserver/dataproviders/DataProvider.h +++ b/paddle/legacy/gserver/dataproviders/DataProvider.h @@ -25,17 +25,17 @@ limitations under the License. */ #include #include "DataConfig.pb.h" -#include "paddle/math/Matrix.h" -#include "paddle/math/SparseMatrix.h" -#include "paddle/math/Vector.h" -#include "paddle/parameter/Argument.h" -#include "paddle/utils/ClassRegistrar.h" -#include "paddle/utils/Common.h" -#include "paddle/utils/Locks.h" -#include "paddle/utils/Logging.h" -#include "paddle/utils/Queue.h" -#include "paddle/utils/ThreadLocal.h" -#include "paddle/utils/Util.h" +#include "paddle/legacy/math/Matrix.h" +#include "paddle/legacy/math/SparseMatrix.h" +#include "paddle/legacy/math/Vector.h" +#include "paddle/legacy/parameter/Argument.h" +#include "paddle/legacy/utils/ClassRegistrar.h" +#include "paddle/legacy/utils/Common.h" +#include "paddle/legacy/utils/Locks.h" +#include "paddle/legacy/utils/Logging.h" +#include "paddle/legacy/utils/Queue.h" +#include "paddle/legacy/utils/ThreadLocal.h" +#include "paddle/legacy/utils/Util.h" namespace paddle { /** diff --git a/paddle/gserver/dataproviders/DataProviderGroup.h b/paddle/legacy/gserver/dataproviders/DataProviderGroup.h similarity index 100% rename from paddle/gserver/dataproviders/DataProviderGroup.h rename to paddle/legacy/gserver/dataproviders/DataProviderGroup.h diff --git a/paddle/gserver/dataproviders/MultiDataProvider.cpp b/paddle/legacy/gserver/dataproviders/MultiDataProvider.cpp similarity index 98% rename from paddle/gserver/dataproviders/MultiDataProvider.cpp rename to paddle/legacy/gserver/dataproviders/MultiDataProvider.cpp index f71947ef39..e5fc6d8a88 100644 --- a/paddle/gserver/dataproviders/MultiDataProvider.cpp +++ b/paddle/legacy/gserver/dataproviders/MultiDataProvider.cpp @@ -14,8 +14,8 @@ limitations under the License. */ #include "MultiDataProvider.h" #include -#include "paddle/utils/Logging.h" -#include "paddle/utils/Util.h" +#include "paddle/legacy/utils/Logging.h" +#include "paddle/legacy/utils/Util.h" namespace paddle { diff --git a/paddle/gserver/dataproviders/MultiDataProvider.h b/paddle/legacy/gserver/dataproviders/MultiDataProvider.h similarity index 100% rename from paddle/gserver/dataproviders/MultiDataProvider.h rename to paddle/legacy/gserver/dataproviders/MultiDataProvider.h diff --git a/paddle/gserver/dataproviders/ProtoReader.h b/paddle/legacy/gserver/dataproviders/ProtoReader.h similarity index 100% rename from paddle/gserver/dataproviders/ProtoReader.h rename to paddle/legacy/gserver/dataproviders/ProtoReader.h diff --git a/paddle/gserver/dataproviders/PyDataProvider.cpp b/paddle/legacy/gserver/dataproviders/PyDataProvider.cpp similarity index 99% rename from paddle/gserver/dataproviders/PyDataProvider.cpp rename to paddle/legacy/gserver/dataproviders/PyDataProvider.cpp index dadf1b4cf2..0827bd39d4 100644 --- a/paddle/gserver/dataproviders/PyDataProvider.cpp +++ b/paddle/legacy/gserver/dataproviders/PyDataProvider.cpp @@ -13,9 +13,9 @@ See the License for the specific language governing permissions and limitations under the License. */ #include "PyDataProvider.h" -#include "paddle/utils/Common.h" -#include "paddle/utils/PythonUtil.h" -#include "paddle/utils/Util.h" +#include "paddle/legacy/utils/Common.h" +#include "paddle/legacy/utils/PythonUtil.h" +#include "paddle/legacy/utils/Util.h" namespace paddle { diff --git a/paddle/gserver/dataproviders/PyDataProvider.h b/paddle/legacy/gserver/dataproviders/PyDataProvider.h similarity index 99% rename from paddle/gserver/dataproviders/PyDataProvider.h rename to paddle/legacy/gserver/dataproviders/PyDataProvider.h index da50dd4e2e..4b8bea04a1 100644 --- a/paddle/gserver/dataproviders/PyDataProvider.h +++ b/paddle/legacy/gserver/dataproviders/PyDataProvider.h @@ -14,7 +14,7 @@ limitations under the License. */ #pragma once -#include +#include #include "DataFormat.pb.h" #include "DataProvider.h" diff --git a/paddle/gserver/dataproviders/PyDataProvider2.cpp b/paddle/legacy/gserver/dataproviders/PyDataProvider2.cpp similarity index 99% rename from paddle/gserver/dataproviders/PyDataProvider2.cpp rename to paddle/legacy/gserver/dataproviders/PyDataProvider2.cpp index 54ee091e8f..8e931e4061 100644 --- a/paddle/gserver/dataproviders/PyDataProvider2.cpp +++ b/paddle/legacy/gserver/dataproviders/PyDataProvider2.cpp @@ -25,9 +25,9 @@ limitations under the License. */ #include "DataProvider.h" -#include "paddle/utils/Locks.h" -#include "paddle/utils/PythonUtil.h" -#include "paddle/utils/Stat.h" +#include "paddle/legacy/utils/Locks.h" +#include "paddle/legacy/utils/PythonUtil.h" +#include "paddle/legacy/utils/Stat.h" namespace paddle { diff --git a/paddle/gserver/evaluators/CTCErrorEvaluator.cpp b/paddle/legacy/gserver/evaluators/CTCErrorEvaluator.cpp similarity index 98% rename from paddle/gserver/evaluators/CTCErrorEvaluator.cpp rename to paddle/legacy/gserver/evaluators/CTCErrorEvaluator.cpp index c6cd41de9a..c145adda5e 100644 --- a/paddle/gserver/evaluators/CTCErrorEvaluator.cpp +++ b/paddle/legacy/gserver/evaluators/CTCErrorEvaluator.cpp @@ -13,8 +13,8 @@ See the License for the specific language governing permissions and limitations under the License. */ #include "Evaluator.h" -#include "paddle/gserver/gradientmachines/NeuralNetwork.h" -#include "paddle/utils/StringUtil.h" +#include "paddle/legacy/gserver/gradientmachines/NeuralNetwork.h" +#include "paddle/legacy/utils/StringUtil.h" namespace paddle { diff --git a/paddle/gserver/evaluators/ChunkEvaluator.cpp b/paddle/legacy/gserver/evaluators/ChunkEvaluator.cpp similarity index 99% rename from paddle/gserver/evaluators/ChunkEvaluator.cpp rename to paddle/legacy/gserver/evaluators/ChunkEvaluator.cpp index a2216293b1..0ff3f2fa8c 100644 --- a/paddle/gserver/evaluators/ChunkEvaluator.cpp +++ b/paddle/legacy/gserver/evaluators/ChunkEvaluator.cpp @@ -15,8 +15,8 @@ limitations under the License. */ #include #include -#include "paddle/math/Vector.h" -#include "paddle/utils/StringUtil.h" +#include "paddle/legacy/math/Vector.h" +#include "paddle/legacy/utils/StringUtil.h" #include "Evaluator.h" diff --git a/paddle/gserver/evaluators/DetectionMAPEvaluator.cpp b/paddle/legacy/gserver/evaluators/DetectionMAPEvaluator.cpp similarity index 99% rename from paddle/gserver/evaluators/DetectionMAPEvaluator.cpp rename to paddle/legacy/gserver/evaluators/DetectionMAPEvaluator.cpp index ddb8ebca78..57657241f8 100644 --- a/paddle/gserver/evaluators/DetectionMAPEvaluator.cpp +++ b/paddle/legacy/gserver/evaluators/DetectionMAPEvaluator.cpp @@ -13,7 +13,7 @@ See the License for the specific language governing permissions and limitations under the License. */ #include "Evaluator.h" -#include "paddle/gserver/layers/DetectionUtil.h" +#include "paddle/legacy/gserver/layers/DetectionUtil.h" using std::map; using std::vector; diff --git a/paddle/gserver/evaluators/Evaluator.cpp b/paddle/legacy/gserver/evaluators/Evaluator.cpp similarity index 99% rename from paddle/gserver/evaluators/Evaluator.cpp rename to paddle/legacy/gserver/evaluators/Evaluator.cpp index 941fb8fb53..a956f40d02 100644 --- a/paddle/gserver/evaluators/Evaluator.cpp +++ b/paddle/legacy/gserver/evaluators/Evaluator.cpp @@ -12,10 +12,10 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. */ -#include "paddle/gserver/evaluators/Evaluator.h" -#include "paddle/gserver/gradientmachines/NeuralNetwork.h" -#include "paddle/utils/Stat.h" -#include "paddle/utils/StringUtil.h" +#include "paddle/legacy/gserver/evaluators/Evaluator.h" +#include "paddle/legacy/gserver/gradientmachines/NeuralNetwork.h" +#include "paddle/legacy/utils/Stat.h" +#include "paddle/legacy/utils/StringUtil.h" DECLARE_int32(trainer_id); diff --git a/paddle/gserver/evaluators/Evaluator.h b/paddle/legacy/gserver/evaluators/Evaluator.h similarity index 98% rename from paddle/gserver/evaluators/Evaluator.h rename to paddle/legacy/gserver/evaluators/Evaluator.h index 42948f1097..b3462819b1 100644 --- a/paddle/gserver/evaluators/Evaluator.h +++ b/paddle/legacy/gserver/evaluators/Evaluator.h @@ -16,10 +16,10 @@ limitations under the License. */ #include #include "ModelConfig.pb.h" -#include "paddle/parameter/Argument.h" -#include "paddle/pserver/ParameterClient2.h" -#include "paddle/utils/ClassRegistrar.h" -#include "paddle/utils/Error.h" +#include "paddle/legacy/parameter/Argument.h" +#include "paddle/legacy/pserver/ParameterClient2.h" +#include "paddle/legacy/utils/ClassRegistrar.h" +#include "paddle/legacy/utils/Error.h" namespace paddle { diff --git a/paddle/gserver/gradientmachines/GradientMachine.cpp b/paddle/legacy/gserver/gradientmachines/GradientMachine.cpp similarity index 98% rename from paddle/gserver/gradientmachines/GradientMachine.cpp rename to paddle/legacy/gserver/gradientmachines/GradientMachine.cpp index 654024e8a4..1c4034d8bb 100644 --- a/paddle/gserver/gradientmachines/GradientMachine.cpp +++ b/paddle/legacy/gserver/gradientmachines/GradientMachine.cpp @@ -15,7 +15,7 @@ limitations under the License. */ #include "GradientMachine.h" #include -#include "paddle/utils/Logging.h" +#include "paddle/legacy/utils/Logging.h" #include "NeuralNetwork.h" #include "hl_gpu.h" diff --git a/paddle/gserver/gradientmachines/GradientMachine.h b/paddle/legacy/gserver/gradientmachines/GradientMachine.h similarity index 95% rename from paddle/gserver/gradientmachines/GradientMachine.h rename to paddle/legacy/gserver/gradientmachines/GradientMachine.h index 22cf5d265f..d4f754a9f4 100644 --- a/paddle/gserver/gradientmachines/GradientMachine.h +++ b/paddle/legacy/gserver/gradientmachines/GradientMachine.h @@ -19,15 +19,15 @@ limitations under the License. */ #include "ModelConfig.pb.h" #include "TrainerConfig.pb.h" -#include "paddle/gserver/dataproviders/DataProvider.h" -#include "paddle/gserver/layers/Layer.h" -#include "paddle/math/Matrix.h" -#include "paddle/parameter/Parameter.h" -#include "paddle/parameter/ParameterUpdaterBase.h" -#include "paddle/utils/Thread.h" +#include "paddle/legacy/gserver/dataproviders/DataProvider.h" +#include "paddle/legacy/gserver/layers/Layer.h" +#include "paddle/legacy/math/Matrix.h" +#include "paddle/legacy/parameter/Parameter.h" +#include "paddle/legacy/parameter/ParameterUpdaterBase.h" +#include "paddle/legacy/utils/Thread.h" #ifndef PADDLE_MOBILE_INFERENCE -#include "paddle/gserver/evaluators/Evaluator.h" +#include "paddle/legacy/gserver/evaluators/Evaluator.h" #endif namespace paddle { diff --git a/paddle/gserver/gradientmachines/GradientMachineMode.cpp b/paddle/legacy/gserver/gradientmachines/GradientMachineMode.cpp similarity index 100% rename from paddle/gserver/gradientmachines/GradientMachineMode.cpp rename to paddle/legacy/gserver/gradientmachines/GradientMachineMode.cpp diff --git a/paddle/gserver/gradientmachines/GradientMachineMode.h b/paddle/legacy/gserver/gradientmachines/GradientMachineMode.h similarity index 100% rename from paddle/gserver/gradientmachines/GradientMachineMode.h rename to paddle/legacy/gserver/gradientmachines/GradientMachineMode.h diff --git a/paddle/gserver/gradientmachines/MultiGradientMachine.cpp b/paddle/legacy/gserver/gradientmachines/MultiGradientMachine.cpp similarity index 99% rename from paddle/gserver/gradientmachines/MultiGradientMachine.cpp rename to paddle/legacy/gserver/gradientmachines/MultiGradientMachine.cpp index b8d4d28f0f..3ef0dfbfe2 100644 --- a/paddle/gserver/gradientmachines/MultiGradientMachine.cpp +++ b/paddle/legacy/gserver/gradientmachines/MultiGradientMachine.cpp @@ -14,9 +14,9 @@ limitations under the License. */ #include "MultiGradientMachine.h" -#include "paddle/utils/Logging.h" +#include "paddle/legacy/utils/Logging.h" -#include "paddle/utils/Stat.h" +#include "paddle/legacy/utils/Stat.h" #include "NeuralNetwork.h" #include "ParallelNeuralNetwork.h" @@ -532,6 +532,7 @@ void TrainerThread::computeThread() { break; } } + hl_fini(); } void TrainerThread::prefetch() { @@ -651,6 +652,7 @@ void TrainerThread::copyGradToBufferThread() { } partnerThread->notifyGradientCollect(pid); } + hl_fini(); } void TrainerThread::gradCollectThread() { @@ -693,6 +695,7 @@ void TrainerThread::gradCollectThread() { notifyCopyGradToBuffer(pid); } } + hl_fini(); } void TrainerThread::doCallback(int pid) { @@ -741,6 +744,7 @@ void TrainerThread::valueDispatchThread() { thread->notifyValueReady(pid); } + hl_fini(); } void TrainerThread::notifyValueReady(int paramId) { diff --git a/paddle/gserver/gradientmachines/MultiGradientMachine.h b/paddle/legacy/gserver/gradientmachines/MultiGradientMachine.h similarity index 99% rename from paddle/gserver/gradientmachines/MultiGradientMachine.h rename to paddle/legacy/gserver/gradientmachines/MultiGradientMachine.h index eff7d5284c..674acd4124 100644 --- a/paddle/gserver/gradientmachines/MultiGradientMachine.h +++ b/paddle/legacy/gserver/gradientmachines/MultiGradientMachine.h @@ -19,8 +19,8 @@ limitations under the License. */ #include "GradientMachine.h" #include "hl_gpu.h" -#include "paddle/utils/Locks.h" -#include "paddle/utils/Queue.h" +#include "paddle/legacy/utils/Locks.h" +#include "paddle/legacy/utils/Queue.h" namespace paddle { diff --git a/paddle/gserver/gradientmachines/MultiNetwork.cpp b/paddle/legacy/gserver/gradientmachines/MultiNetwork.cpp similarity index 98% rename from paddle/gserver/gradientmachines/MultiNetwork.cpp rename to paddle/legacy/gserver/gradientmachines/MultiNetwork.cpp index 5f3d09dda2..1245c44103 100644 --- a/paddle/gserver/gradientmachines/MultiNetwork.cpp +++ b/paddle/legacy/gserver/gradientmachines/MultiNetwork.cpp @@ -13,8 +13,8 @@ See the License for the specific language governing permissions and limitations under the License. */ #include -#include "paddle/utils/Stat.h" -#include "paddle/utils/Util.h" +#include "paddle/legacy/utils/Stat.h" +#include "paddle/legacy/utils/Util.h" #include "MultiNetwork.h" diff --git a/paddle/gserver/gradientmachines/MultiNetwork.h b/paddle/legacy/gserver/gradientmachines/MultiNetwork.h similarity index 98% rename from paddle/gserver/gradientmachines/MultiNetwork.h rename to paddle/legacy/gserver/gradientmachines/MultiNetwork.h index 495d559201..afe15cb020 100644 --- a/paddle/gserver/gradientmachines/MultiNetwork.h +++ b/paddle/legacy/gserver/gradientmachines/MultiNetwork.h @@ -17,7 +17,7 @@ limitations under the License. */ #include "GradientMachine.h" #include "NeuralNetwork.h" -#include "paddle/utils/Locks.h" +#include "paddle/legacy/utils/Locks.h" namespace paddle { diff --git a/paddle/gserver/gradientmachines/NeuralNetwork.cpp b/paddle/legacy/gserver/gradientmachines/NeuralNetwork.cpp similarity index 98% rename from paddle/gserver/gradientmachines/NeuralNetwork.cpp rename to paddle/legacy/gserver/gradientmachines/NeuralNetwork.cpp index ac60a3a340..0f8048152f 100644 --- a/paddle/gserver/gradientmachines/NeuralNetwork.cpp +++ b/paddle/legacy/gserver/gradientmachines/NeuralNetwork.cpp @@ -12,22 +12,22 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. */ -#include "paddle/utils/Util.h" +#include "paddle/legacy/utils/Util.h" #include "NeuralNetwork.h" #include "hl_gpu.h" -#include "paddle/utils/CustomStackTrace.h" -#include "paddle/utils/Logging.h" -#include "paddle/utils/Stat.h" +#include "paddle/legacy/utils/CustomStackTrace.h" +#include "paddle/legacy/utils/Logging.h" +#include "paddle/legacy/utils/Stat.h" #ifdef PADDLE_WITH_MKLDNN -#include "paddle/gserver/layers/MKLDNNLayer.h" +#include "paddle/legacy/gserver/layers/MKLDNNLayer.h" #endif #ifndef PADDLE_MOBILE_INFERENCE #include "MultiNetwork.h" #include "RecurrentGradientMachine.h" -#include "paddle/gserver/layers/AgentLayer.h" +#include "paddle/legacy/gserver/layers/AgentLayer.h" #endif namespace paddle { diff --git a/paddle/gserver/gradientmachines/NeuralNetwork.h b/paddle/legacy/gserver/gradientmachines/NeuralNetwork.h similarity index 93% rename from paddle/gserver/gradientmachines/NeuralNetwork.h rename to paddle/legacy/gserver/gradientmachines/NeuralNetwork.h index 3e5615c8f0..566157c899 100644 --- a/paddle/gserver/gradientmachines/NeuralNetwork.h +++ b/paddle/legacy/gserver/gradientmachines/NeuralNetwork.h @@ -19,13 +19,13 @@ limitations under the License. */ #include #include "ModelConfig.pb.h" -#include "paddle/gserver/dataproviders/DataProvider.h" -#include "paddle/gserver/gradientmachines/GradientMachine.h" -#include "paddle/gserver/layers/CostLayer.h" -#include "paddle/gserver/layers/DataLayer.h" -#include "paddle/gserver/layers/Layer.h" -#include "paddle/parameter/Parameter.h" -#include "paddle/utils/ClassRegistrar.h" +#include "paddle/legacy/gserver/dataproviders/DataProvider.h" +#include "paddle/legacy/gserver/gradientmachines/GradientMachine.h" +#include "paddle/legacy/gserver/layers/CostLayer.h" +#include "paddle/legacy/gserver/layers/DataLayer.h" +#include "paddle/legacy/gserver/layers/Layer.h" +#include "paddle/legacy/parameter/Parameter.h" +#include "paddle/legacy/utils/ClassRegistrar.h" namespace paddle { /* diff --git a/paddle/gserver/gradientmachines/ParallelNeuralNetwork.cpp b/paddle/legacy/gserver/gradientmachines/ParallelNeuralNetwork.cpp similarity index 98% rename from paddle/gserver/gradientmachines/ParallelNeuralNetwork.cpp rename to paddle/legacy/gserver/gradientmachines/ParallelNeuralNetwork.cpp index 85cfc59fbe..33d24b5b83 100644 --- a/paddle/gserver/gradientmachines/ParallelNeuralNetwork.cpp +++ b/paddle/legacy/gserver/gradientmachines/ParallelNeuralNetwork.cpp @@ -12,8 +12,8 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. */ -#include "paddle/utils/Stat.h" -#include "paddle/utils/Util.h" +#include "paddle/legacy/utils/Stat.h" +#include "paddle/legacy/utils/Util.h" #include "ParallelNeuralNetwork.h" @@ -197,6 +197,7 @@ void ParallelThread::computeThread() { job_work.layer_->markAllInputGrad(); } } + hl_fini(); } void ParallelThread::start() { diff --git a/paddle/gserver/gradientmachines/ParallelNeuralNetwork.h b/paddle/legacy/gserver/gradientmachines/ParallelNeuralNetwork.h similarity index 100% rename from paddle/gserver/gradientmachines/ParallelNeuralNetwork.h rename to paddle/legacy/gserver/gradientmachines/ParallelNeuralNetwork.h diff --git a/paddle/gserver/gradientmachines/RecurrentGradientMachine.cpp b/paddle/legacy/gserver/gradientmachines/RecurrentGradientMachine.cpp similarity index 99% rename from paddle/gserver/gradientmachines/RecurrentGradientMachine.cpp rename to paddle/legacy/gserver/gradientmachines/RecurrentGradientMachine.cpp index 73ac8cda72..e49f042404 100644 --- a/paddle/gserver/gradientmachines/RecurrentGradientMachine.cpp +++ b/paddle/legacy/gserver/gradientmachines/RecurrentGradientMachine.cpp @@ -19,10 +19,10 @@ limitations under the License. */ #include #include #include "NeuralNetwork.h" -#include "paddle/gserver/layers/AgentLayer.h" -#include "paddle/utils/Flags.h" -#include "paddle/utils/Stat.h" -#include "paddle/utils/Util.h" +#include "paddle/legacy/gserver/layers/AgentLayer.h" +#include "paddle/legacy/utils/Flags.h" +#include "paddle/legacy/utils/Stat.h" +#include "paddle/legacy/utils/Util.h" DEFINE_string(diy_beam_search_prob_so, "", "the diy beam search cost so"); diff --git a/paddle/gserver/gradientmachines/RecurrentGradientMachine.h b/paddle/legacy/gserver/gradientmachines/RecurrentGradientMachine.h similarity index 99% rename from paddle/gserver/gradientmachines/RecurrentGradientMachine.h rename to paddle/legacy/gserver/gradientmachines/RecurrentGradientMachine.h index 7e943cebd3..0a13d4f6f8 100644 --- a/paddle/gserver/gradientmachines/RecurrentGradientMachine.h +++ b/paddle/legacy/gserver/gradientmachines/RecurrentGradientMachine.h @@ -18,7 +18,7 @@ limitations under the License. */ #include "GradientMachine.h" #include "NeuralNetwork.h" -#include "paddle/utils/Locks.h" +#include "paddle/legacy/utils/Locks.h" namespace paddle { diff --git a/paddle/gserver/layers/AddtoLayer.cpp b/paddle/legacy/gserver/layers/AddtoLayer.cpp similarity index 96% rename from paddle/gserver/layers/AddtoLayer.cpp rename to paddle/legacy/gserver/layers/AddtoLayer.cpp index 75e17f52df..39c5603d93 100644 --- a/paddle/gserver/layers/AddtoLayer.cpp +++ b/paddle/legacy/gserver/layers/AddtoLayer.cpp @@ -14,9 +14,9 @@ limitations under the License. */ #include "AddtoLayer.h" -#include "paddle/utils/Logging.h" +#include "paddle/legacy/utils/Logging.h" -#include "paddle/utils/Stat.h" +#include "paddle/legacy/utils/Stat.h" namespace paddle { diff --git a/paddle/gserver/layers/AddtoLayer.h b/paddle/legacy/gserver/layers/AddtoLayer.h similarity index 95% rename from paddle/gserver/layers/AddtoLayer.h rename to paddle/legacy/gserver/layers/AddtoLayer.h index 6ea54f4a53..ad3cefe1a4 100644 --- a/paddle/gserver/layers/AddtoLayer.h +++ b/paddle/legacy/gserver/layers/AddtoLayer.h @@ -15,8 +15,8 @@ limitations under the License. */ #pragma once #include "Layer.h" -#include "paddle/math/Matrix.h" -#include "paddle/utils/ThreadLocal.h" +#include "paddle/legacy/math/Matrix.h" +#include "paddle/legacy/utils/ThreadLocal.h" namespace paddle { diff --git a/paddle/gserver/layers/AgentLayer.cpp b/paddle/legacy/gserver/layers/AgentLayer.cpp similarity index 99% rename from paddle/gserver/layers/AgentLayer.cpp rename to paddle/legacy/gserver/layers/AgentLayer.cpp index e2f73f88f5..bae89b2fa3 100644 --- a/paddle/gserver/layers/AgentLayer.cpp +++ b/paddle/legacy/gserver/layers/AgentLayer.cpp @@ -14,9 +14,9 @@ limitations under the License. */ #include "AgentLayer.h" -#include "paddle/utils/Logging.h" +#include "paddle/legacy/utils/Logging.h" -#include "paddle/utils/Stat.h" +#include "paddle/legacy/utils/Stat.h" namespace paddle { diff --git a/paddle/gserver/layers/AgentLayer.h b/paddle/legacy/gserver/layers/AgentLayer.h similarity index 98% rename from paddle/gserver/layers/AgentLayer.h rename to paddle/legacy/gserver/layers/AgentLayer.h index 51f346d5c9..a05eac5e70 100644 --- a/paddle/gserver/layers/AgentLayer.h +++ b/paddle/legacy/gserver/layers/AgentLayer.h @@ -15,8 +15,8 @@ limitations under the License. */ #pragma once #include "Layer.h" -#include "paddle/math/Matrix.h" -#include "paddle/utils/ThreadLocal.h" +#include "paddle/legacy/math/Matrix.h" +#include "paddle/legacy/utils/ThreadLocal.h" namespace paddle { diff --git a/paddle/gserver/layers/AverageLayer.cpp b/paddle/legacy/gserver/layers/AverageLayer.cpp similarity index 96% rename from paddle/gserver/layers/AverageLayer.cpp rename to paddle/legacy/gserver/layers/AverageLayer.cpp index b3787b1448..0539da7937 100644 --- a/paddle/gserver/layers/AverageLayer.cpp +++ b/paddle/legacy/gserver/layers/AverageLayer.cpp @@ -14,9 +14,9 @@ limitations under the License. */ #include "AverageLayer.h" -#include "paddle/utils/Logging.h" +#include "paddle/legacy/utils/Logging.h" -#include "paddle/utils/Stat.h" +#include "paddle/legacy/utils/Stat.h" namespace paddle { diff --git a/paddle/gserver/layers/AverageLayer.h b/paddle/legacy/gserver/layers/AverageLayer.h similarity index 98% rename from paddle/gserver/layers/AverageLayer.h rename to paddle/legacy/gserver/layers/AverageLayer.h index 03e2673b55..a0d457d35f 100644 --- a/paddle/gserver/layers/AverageLayer.h +++ b/paddle/legacy/gserver/layers/AverageLayer.h @@ -15,7 +15,7 @@ limitations under the License. */ #pragma once #include "SequencePoolLayer.h" -#include "paddle/math/Matrix.h" +#include "paddle/legacy/math/Matrix.h" namespace paddle { diff --git a/paddle/gserver/layers/BatchNormBaseLayer.cpp b/paddle/legacy/gserver/layers/BatchNormBaseLayer.cpp similarity index 98% rename from paddle/gserver/layers/BatchNormBaseLayer.cpp rename to paddle/legacy/gserver/layers/BatchNormBaseLayer.cpp index a3516f9423..4dcbd8dc27 100644 --- a/paddle/gserver/layers/BatchNormBaseLayer.cpp +++ b/paddle/legacy/gserver/layers/BatchNormBaseLayer.cpp @@ -15,7 +15,7 @@ limitations under the License. */ #include "BatchNormBaseLayer.h" #include "BatchNormalizationLayer.h" #include "Layer.h" -#include "paddle/utils/Stat.h" +#include "paddle/legacy/utils/Stat.h" #ifdef PADDLE_WITH_CUDA #include "CudnnBatchNormLayer.h" #endif diff --git a/paddle/gserver/layers/BatchNormBaseLayer.h b/paddle/legacy/gserver/layers/BatchNormBaseLayer.h similarity index 98% rename from paddle/gserver/layers/BatchNormBaseLayer.h rename to paddle/legacy/gserver/layers/BatchNormBaseLayer.h index 5a446c0843..8dc1d78837 100644 --- a/paddle/gserver/layers/BatchNormBaseLayer.h +++ b/paddle/legacy/gserver/layers/BatchNormBaseLayer.h @@ -15,7 +15,7 @@ limitations under the License. */ #pragma once #include "Layer.h" -#include "paddle/utils/Stat.h" +#include "paddle/legacy/utils/Stat.h" namespace paddle { diff --git a/paddle/gserver/layers/BatchNormalizationLayer.cpp b/paddle/legacy/gserver/layers/BatchNormalizationLayer.cpp similarity index 99% rename from paddle/gserver/layers/BatchNormalizationLayer.cpp rename to paddle/legacy/gserver/layers/BatchNormalizationLayer.cpp index 59831dd904..0297bd44c7 100644 --- a/paddle/gserver/layers/BatchNormalizationLayer.cpp +++ b/paddle/legacy/gserver/layers/BatchNormalizationLayer.cpp @@ -12,7 +12,7 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. */ -#include "paddle/utils/Stat.h" +#include "paddle/legacy/utils/Stat.h" #ifdef PADDLE_WITH_CUDA #include "hl_batch_transpose.h" #endif diff --git a/paddle/gserver/layers/BatchNormalizationLayer.h b/paddle/legacy/gserver/layers/BatchNormalizationLayer.h similarity index 100% rename from paddle/gserver/layers/BatchNormalizationLayer.h rename to paddle/legacy/gserver/layers/BatchNormalizationLayer.h diff --git a/paddle/gserver/layers/BilinearInterpLayer.cpp b/paddle/legacy/gserver/layers/BilinearInterpLayer.cpp similarity index 97% rename from paddle/gserver/layers/BilinearInterpLayer.cpp rename to paddle/legacy/gserver/layers/BilinearInterpLayer.cpp index 9775914596..a091f51bc2 100644 --- a/paddle/gserver/layers/BilinearInterpLayer.cpp +++ b/paddle/legacy/gserver/layers/BilinearInterpLayer.cpp @@ -13,8 +13,8 @@ See the License for the specific language governing permissions and limitations under the License. */ #include "BilinearInterpLayer.h" -#include "paddle/utils/Logging.h" -#include "paddle/utils/Stat.h" +#include "paddle/legacy/utils/Logging.h" +#include "paddle/legacy/utils/Stat.h" namespace paddle { diff --git a/paddle/gserver/layers/BilinearInterpLayer.h b/paddle/legacy/gserver/layers/BilinearInterpLayer.h similarity index 97% rename from paddle/gserver/layers/BilinearInterpLayer.h rename to paddle/legacy/gserver/layers/BilinearInterpLayer.h index 8e08c2e1ce..c585a5ed10 100644 --- a/paddle/gserver/layers/BilinearInterpLayer.h +++ b/paddle/legacy/gserver/layers/BilinearInterpLayer.h @@ -15,7 +15,7 @@ limitations under the License. */ #pragma once #include "Layer.h" -#include "paddle/math/Matrix.h" +#include "paddle/legacy/math/Matrix.h" namespace paddle { diff --git a/paddle/gserver/layers/BlockExpandLayer.cpp b/paddle/legacy/gserver/layers/BlockExpandLayer.cpp similarity index 99% rename from paddle/gserver/layers/BlockExpandLayer.cpp rename to paddle/legacy/gserver/layers/BlockExpandLayer.cpp index 793d24e884..24b5af67d4 100644 --- a/paddle/gserver/layers/BlockExpandLayer.cpp +++ b/paddle/legacy/gserver/layers/BlockExpandLayer.cpp @@ -14,7 +14,7 @@ limitations under the License. */ #include "BlockExpandLayer.h" -#include "paddle/utils/Logging.h" +#include "paddle/legacy/utils/Logging.h" namespace paddle { diff --git a/paddle/gserver/layers/BlockExpandLayer.h b/paddle/legacy/gserver/layers/BlockExpandLayer.h similarity index 98% rename from paddle/gserver/layers/BlockExpandLayer.h rename to paddle/legacy/gserver/layers/BlockExpandLayer.h index 9d76584f3a..8b90249bfb 100644 --- a/paddle/gserver/layers/BlockExpandLayer.h +++ b/paddle/legacy/gserver/layers/BlockExpandLayer.h @@ -15,7 +15,7 @@ limitations under the License. */ #pragma once #include "Layer.h" -#include "paddle/math/Matrix.h" +#include "paddle/legacy/math/Matrix.h" namespace paddle { diff --git a/paddle/gserver/layers/CRFDecodingLayer.cpp b/paddle/legacy/gserver/layers/CRFDecodingLayer.cpp similarity index 100% rename from paddle/gserver/layers/CRFDecodingLayer.cpp rename to paddle/legacy/gserver/layers/CRFDecodingLayer.cpp diff --git a/paddle/gserver/layers/CRFDecodingLayer.h b/paddle/legacy/gserver/layers/CRFDecodingLayer.h similarity index 100% rename from paddle/gserver/layers/CRFDecodingLayer.h rename to paddle/legacy/gserver/layers/CRFDecodingLayer.h diff --git a/paddle/gserver/layers/CRFLayer.cpp b/paddle/legacy/gserver/layers/CRFLayer.cpp similarity index 100% rename from paddle/gserver/layers/CRFLayer.cpp rename to paddle/legacy/gserver/layers/CRFLayer.cpp diff --git a/paddle/gserver/layers/CRFLayer.h b/paddle/legacy/gserver/layers/CRFLayer.h similarity index 100% rename from paddle/gserver/layers/CRFLayer.h rename to paddle/legacy/gserver/layers/CRFLayer.h diff --git a/paddle/gserver/layers/CTCLayer.cpp b/paddle/legacy/gserver/layers/CTCLayer.cpp similarity index 100% rename from paddle/gserver/layers/CTCLayer.cpp rename to paddle/legacy/gserver/layers/CTCLayer.cpp diff --git a/paddle/gserver/layers/CTCLayer.h b/paddle/legacy/gserver/layers/CTCLayer.h similarity index 100% rename from paddle/gserver/layers/CTCLayer.h rename to paddle/legacy/gserver/layers/CTCLayer.h diff --git a/paddle/gserver/layers/ClipLayer.cpp b/paddle/legacy/gserver/layers/ClipLayer.cpp similarity index 100% rename from paddle/gserver/layers/ClipLayer.cpp rename to paddle/legacy/gserver/layers/ClipLayer.cpp diff --git a/paddle/gserver/layers/ConcatenateLayer.cpp b/paddle/legacy/gserver/layers/ConcatenateLayer.cpp similarity index 99% rename from paddle/gserver/layers/ConcatenateLayer.cpp rename to paddle/legacy/gserver/layers/ConcatenateLayer.cpp index e6de329ff3..ce3f2ca950 100644 --- a/paddle/gserver/layers/ConcatenateLayer.cpp +++ b/paddle/legacy/gserver/layers/ConcatenateLayer.cpp @@ -14,7 +14,7 @@ limitations under the License. */ #include "Layer.h" #include "Projection.h" -#include "paddle/utils/Stat.h" +#include "paddle/legacy/utils/Stat.h" namespace paddle { diff --git a/paddle/gserver/layers/ContextProjection.cpp b/paddle/legacy/gserver/layers/ContextProjection.cpp similarity index 99% rename from paddle/gserver/layers/ContextProjection.cpp rename to paddle/legacy/gserver/layers/ContextProjection.cpp index 10c3cef0da..8bcf32663e 100644 --- a/paddle/gserver/layers/ContextProjection.cpp +++ b/paddle/legacy/gserver/layers/ContextProjection.cpp @@ -13,7 +13,7 @@ See the License for the specific language governing permissions and limitations under the License. */ #include "ContextProjection.h" -#include "paddle/utils/Stat.h" +#include "paddle/legacy/utils/Stat.h" namespace paddle { diff --git a/paddle/gserver/layers/ContextProjection.h b/paddle/legacy/gserver/layers/ContextProjection.h similarity index 100% rename from paddle/gserver/layers/ContextProjection.h rename to paddle/legacy/gserver/layers/ContextProjection.h diff --git a/paddle/gserver/layers/Conv3DLayer.cpp b/paddle/legacy/gserver/layers/Conv3DLayer.cpp similarity index 99% rename from paddle/gserver/layers/Conv3DLayer.cpp rename to paddle/legacy/gserver/layers/Conv3DLayer.cpp index b38de86b15..d072a74234 100644 --- a/paddle/gserver/layers/Conv3DLayer.cpp +++ b/paddle/legacy/gserver/layers/Conv3DLayer.cpp @@ -13,8 +13,8 @@ See the License for the specific language governing permissions and limitations under the License. */ #include "Conv3DLayer.h" -#include "paddle/utils/Logging.h" -#include "paddle/utils/Stat.h" +#include "paddle/legacy/utils/Logging.h" +#include "paddle/legacy/utils/Stat.h" namespace paddle { diff --git a/paddle/gserver/layers/Conv3DLayer.h b/paddle/legacy/gserver/layers/Conv3DLayer.h similarity index 94% rename from paddle/gserver/layers/Conv3DLayer.h rename to paddle/legacy/gserver/layers/Conv3DLayer.h index 07b804bad0..cb42a2f36d 100644 --- a/paddle/gserver/layers/Conv3DLayer.h +++ b/paddle/legacy/gserver/layers/Conv3DLayer.h @@ -15,8 +15,8 @@ limitations under the License. */ #pragma once #include #include "ConvBaseLayer.h" -#include "paddle/math/MathUtils.h" -#include "paddle/math/Matrix.h" +#include "paddle/legacy/math/MathUtils.h" +#include "paddle/legacy/math/Matrix.h" namespace paddle { diff --git a/paddle/gserver/layers/ConvBaseLayer.cpp b/paddle/legacy/gserver/layers/ConvBaseLayer.cpp similarity index 98% rename from paddle/gserver/layers/ConvBaseLayer.cpp rename to paddle/legacy/gserver/layers/ConvBaseLayer.cpp index 56bf4f9fcb..76120915e4 100644 --- a/paddle/gserver/layers/ConvBaseLayer.cpp +++ b/paddle/legacy/gserver/layers/ConvBaseLayer.cpp @@ -13,8 +13,8 @@ See the License for the specific language governing permissions and limitations under the License. */ #include "ConvBaseLayer.h" -#include "paddle/math/MathUtils.h" -#include "paddle/utils/Logging.h" +#include "paddle/legacy/math/MathUtils.h" +#include "paddle/legacy/utils/Logging.h" namespace paddle { bool ConvBaseLayer::init(const LayerMap& layerMap, diff --git a/paddle/gserver/layers/ConvBaseLayer.h b/paddle/legacy/gserver/layers/ConvBaseLayer.h similarity index 98% rename from paddle/gserver/layers/ConvBaseLayer.h rename to paddle/legacy/gserver/layers/ConvBaseLayer.h index 801bc4f888..01e90e9996 100644 --- a/paddle/gserver/layers/ConvBaseLayer.h +++ b/paddle/legacy/gserver/layers/ConvBaseLayer.h @@ -15,7 +15,7 @@ limitations under the License. */ #pragma once #include "Layer.h" -#include "paddle/math/MathUtils.h" +#include "paddle/legacy/math/MathUtils.h" namespace paddle { /** diff --git a/paddle/gserver/layers/ConvBaseOperator.cpp b/paddle/legacy/gserver/layers/ConvBaseOperator.cpp similarity index 98% rename from paddle/gserver/layers/ConvBaseOperator.cpp rename to paddle/legacy/gserver/layers/ConvBaseOperator.cpp index 317e7d5c60..e8e59b3bfe 100644 --- a/paddle/gserver/layers/ConvBaseOperator.cpp +++ b/paddle/legacy/gserver/layers/ConvBaseOperator.cpp @@ -13,8 +13,8 @@ See the License for the specific language governing permissions and limitations under the License. */ #include "ConvBaseOperator.h" -#include "paddle/math/MathUtils.h" -#include "paddle/math/Matrix.h" +#include "paddle/legacy/math/MathUtils.h" +#include "paddle/legacy/math/Matrix.h" namespace paddle { diff --git a/paddle/gserver/layers/ConvBaseOperator.h b/paddle/legacy/gserver/layers/ConvBaseOperator.h similarity index 97% rename from paddle/gserver/layers/ConvBaseOperator.h rename to paddle/legacy/gserver/layers/ConvBaseOperator.h index c3c647cb69..4ac77f2d74 100644 --- a/paddle/gserver/layers/ConvBaseOperator.h +++ b/paddle/legacy/gserver/layers/ConvBaseOperator.h @@ -14,8 +14,8 @@ limitations under the License. */ #pragma once #include "Operator.h" -#include "paddle/math/MathUtils.h" -#include "paddle/math/Matrix.h" +#include "paddle/legacy/math/MathUtils.h" +#include "paddle/legacy/math/Matrix.h" namespace paddle { diff --git a/paddle/gserver/layers/ConvBaseProjection.cpp b/paddle/legacy/gserver/layers/ConvBaseProjection.cpp similarity index 99% rename from paddle/gserver/layers/ConvBaseProjection.cpp rename to paddle/legacy/gserver/layers/ConvBaseProjection.cpp index 39f433b78f..ff5d3412de 100644 --- a/paddle/gserver/layers/ConvBaseProjection.cpp +++ b/paddle/legacy/gserver/layers/ConvBaseProjection.cpp @@ -13,7 +13,7 @@ See the License for the specific language governing permissions and limitations under the License. */ #include "ConvBaseProjection.h" -#include "paddle/utils/Stat.h" +#include "paddle/legacy/utils/Stat.h" namespace paddle { diff --git a/paddle/gserver/layers/ConvBaseProjection.h b/paddle/legacy/gserver/layers/ConvBaseProjection.h similarity index 98% rename from paddle/gserver/layers/ConvBaseProjection.h rename to paddle/legacy/gserver/layers/ConvBaseProjection.h index f3266ae1ab..dcf5ce0f48 100644 --- a/paddle/gserver/layers/ConvBaseProjection.h +++ b/paddle/legacy/gserver/layers/ConvBaseProjection.h @@ -15,7 +15,7 @@ limitations under the License. */ #pragma once #include "Projection.h" -#include "paddle/math/MathUtils.h" +#include "paddle/legacy/math/MathUtils.h" namespace paddle { diff --git a/paddle/gserver/layers/ConvOperator.cpp b/paddle/legacy/gserver/layers/ConvOperator.cpp similarity index 98% rename from paddle/gserver/layers/ConvOperator.cpp rename to paddle/legacy/gserver/layers/ConvOperator.cpp index 45498b92d3..5276b2c392 100644 --- a/paddle/gserver/layers/ConvOperator.cpp +++ b/paddle/legacy/gserver/layers/ConvOperator.cpp @@ -13,8 +13,8 @@ See the License for the specific language governing permissions and limitations under the License. */ #include "ConvOperator.h" -#include "paddle/math/MathUtils.h" -#include "paddle/math/Matrix.h" +#include "paddle/legacy/math/MathUtils.h" +#include "paddle/legacy/math/Matrix.h" namespace paddle { diff --git a/paddle/gserver/layers/ConvOperator.h b/paddle/legacy/gserver/layers/ConvOperator.h similarity index 94% rename from paddle/gserver/layers/ConvOperator.h rename to paddle/legacy/gserver/layers/ConvOperator.h index 527dbf8c27..8f31620111 100644 --- a/paddle/gserver/layers/ConvOperator.h +++ b/paddle/legacy/gserver/layers/ConvOperator.h @@ -14,8 +14,8 @@ limitations under the License. */ #pragma once #include "ConvBaseOperator.h" -#include "paddle/math/MathUtils.h" -#include "paddle/math/Matrix.h" +#include "paddle/legacy/math/MathUtils.h" +#include "paddle/legacy/math/Matrix.h" namespace paddle { diff --git a/paddle/gserver/layers/ConvProjection.cpp b/paddle/legacy/gserver/layers/ConvProjection.cpp similarity index 99% rename from paddle/gserver/layers/ConvProjection.cpp rename to paddle/legacy/gserver/layers/ConvProjection.cpp index f382e6cab1..b40cdac258 100644 --- a/paddle/gserver/layers/ConvProjection.cpp +++ b/paddle/legacy/gserver/layers/ConvProjection.cpp @@ -13,7 +13,7 @@ See the License for the specific language governing permissions and limitations under the License. */ #include "ConvProjection.h" -#include "paddle/utils/Stat.h" +#include "paddle/legacy/utils/Stat.h" namespace paddle { diff --git a/paddle/gserver/layers/ConvProjection.h b/paddle/legacy/gserver/layers/ConvProjection.h similarity index 96% rename from paddle/gserver/layers/ConvProjection.h rename to paddle/legacy/gserver/layers/ConvProjection.h index 22a2202bb6..890a17e2f8 100644 --- a/paddle/gserver/layers/ConvProjection.h +++ b/paddle/legacy/gserver/layers/ConvProjection.h @@ -15,7 +15,7 @@ limitations under the License. */ #pragma once #include "ConvBaseProjection.h" -#include "paddle/math/MathUtils.h" +#include "paddle/legacy/math/MathUtils.h" namespace paddle { diff --git a/paddle/gserver/layers/ConvShiftLayer.cpp b/paddle/legacy/gserver/layers/ConvShiftLayer.cpp similarity index 96% rename from paddle/gserver/layers/ConvShiftLayer.cpp rename to paddle/legacy/gserver/layers/ConvShiftLayer.cpp index 615c347806..b7ecbe556c 100644 --- a/paddle/gserver/layers/ConvShiftLayer.cpp +++ b/paddle/legacy/gserver/layers/ConvShiftLayer.cpp @@ -13,9 +13,9 @@ See the License for the specific language governing permissions and limitations under the License. */ #include "Layer.h" -#include "paddle/math/Matrix.h" -#include "paddle/utils/Logging.h" -#include "paddle/utils/Stat.h" +#include "paddle/legacy/math/Matrix.h" +#include "paddle/legacy/utils/Logging.h" +#include "paddle/legacy/utils/Stat.h" namespace paddle { diff --git a/paddle/gserver/layers/ConvTransOperator.cpp b/paddle/legacy/gserver/layers/ConvTransOperator.cpp similarity index 98% rename from paddle/gserver/layers/ConvTransOperator.cpp rename to paddle/legacy/gserver/layers/ConvTransOperator.cpp index ac41d6f9a4..f4ce2affb1 100644 --- a/paddle/gserver/layers/ConvTransOperator.cpp +++ b/paddle/legacy/gserver/layers/ConvTransOperator.cpp @@ -13,8 +13,8 @@ See the License for the specific language governing permissions and limitations under the License. */ #include "ConvTransOperator.h" -#include "paddle/math/MathUtils.h" -#include "paddle/math/Matrix.h" +#include "paddle/legacy/math/MathUtils.h" +#include "paddle/legacy/math/Matrix.h" namespace paddle { diff --git a/paddle/gserver/layers/ConvTransOperator.h b/paddle/legacy/gserver/layers/ConvTransOperator.h similarity index 94% rename from paddle/gserver/layers/ConvTransOperator.h rename to paddle/legacy/gserver/layers/ConvTransOperator.h index 53cb7a21b4..206335a01f 100644 --- a/paddle/gserver/layers/ConvTransOperator.h +++ b/paddle/legacy/gserver/layers/ConvTransOperator.h @@ -14,8 +14,8 @@ limitations under the License. */ #pragma once #include "ConvBaseOperator.h" -#include "paddle/math/MathUtils.h" -#include "paddle/math/Matrix.h" +#include "paddle/legacy/math/MathUtils.h" +#include "paddle/legacy/math/Matrix.h" namespace paddle { diff --git a/paddle/gserver/layers/ConvTransProjection.cpp b/paddle/legacy/gserver/layers/ConvTransProjection.cpp similarity index 99% rename from paddle/gserver/layers/ConvTransProjection.cpp rename to paddle/legacy/gserver/layers/ConvTransProjection.cpp index 242ce34a60..00e34c8f2d 100644 --- a/paddle/gserver/layers/ConvTransProjection.cpp +++ b/paddle/legacy/gserver/layers/ConvTransProjection.cpp @@ -13,7 +13,7 @@ See the License for the specific language governing permissions and limitations under the License. */ #include "ConvTransProjection.h" -#include "paddle/utils/Stat.h" +#include "paddle/legacy/utils/Stat.h" namespace paddle { diff --git a/paddle/gserver/layers/ConvTransProjection.h b/paddle/legacy/gserver/layers/ConvTransProjection.h similarity index 96% rename from paddle/gserver/layers/ConvTransProjection.h rename to paddle/legacy/gserver/layers/ConvTransProjection.h index 0f9ed720d3..9b63dd4735 100644 --- a/paddle/gserver/layers/ConvTransProjection.h +++ b/paddle/legacy/gserver/layers/ConvTransProjection.h @@ -15,7 +15,7 @@ limitations under the License. */ #pragma once #include "ConvBaseProjection.h" -#include "paddle/math/MathUtils.h" +#include "paddle/legacy/math/MathUtils.h" namespace paddle { diff --git a/paddle/gserver/layers/ConvexCombinationLayer.cpp b/paddle/legacy/gserver/layers/ConvexCombinationLayer.cpp similarity index 97% rename from paddle/gserver/layers/ConvexCombinationLayer.cpp rename to paddle/legacy/gserver/layers/ConvexCombinationLayer.cpp index 31363d97c4..c38ab251f1 100644 --- a/paddle/gserver/layers/ConvexCombinationLayer.cpp +++ b/paddle/legacy/gserver/layers/ConvexCombinationLayer.cpp @@ -13,9 +13,9 @@ See the License for the specific language governing permissions and limitations under the License. */ #include "Layer.h" -#include "paddle/math/Matrix.h" -#include "paddle/utils/Logging.h" -#include "paddle/utils/Stat.h" +#include "paddle/legacy/math/Matrix.h" +#include "paddle/legacy/utils/Logging.h" +#include "paddle/legacy/utils/Stat.h" namespace paddle { diff --git a/paddle/gserver/layers/CosSimLayer.cpp b/paddle/legacy/gserver/layers/CosSimLayer.cpp similarity index 97% rename from paddle/gserver/layers/CosSimLayer.cpp rename to paddle/legacy/gserver/layers/CosSimLayer.cpp index 4e44a5e8df..ab8d7cc1f6 100644 --- a/paddle/gserver/layers/CosSimLayer.cpp +++ b/paddle/legacy/gserver/layers/CosSimLayer.cpp @@ -13,8 +13,8 @@ See the License for the specific language governing permissions and limitations under the License. */ #include "CosSimLayer.h" -#include "paddle/utils/Logging.h" -#include "paddle/utils/Stat.h" +#include "paddle/legacy/utils/Logging.h" +#include "paddle/legacy/utils/Stat.h" namespace paddle { diff --git a/paddle/gserver/layers/CosSimLayer.h b/paddle/legacy/gserver/layers/CosSimLayer.h similarity index 94% rename from paddle/gserver/layers/CosSimLayer.h rename to paddle/legacy/gserver/layers/CosSimLayer.h index d9fe1ff270..b08e2c6a35 100644 --- a/paddle/gserver/layers/CosSimLayer.h +++ b/paddle/legacy/gserver/layers/CosSimLayer.h @@ -15,8 +15,8 @@ limitations under the License. */ #pragma once #include "Layer.h" -#include "paddle/math/Matrix.h" -#include "paddle/utils/ThreadLocal.h" +#include "paddle/legacy/math/Matrix.h" +#include "paddle/legacy/utils/ThreadLocal.h" namespace paddle { /** diff --git a/paddle/gserver/layers/CosSimVecMatLayer.cpp b/paddle/legacy/gserver/layers/CosSimVecMatLayer.cpp similarity index 97% rename from paddle/gserver/layers/CosSimVecMatLayer.cpp rename to paddle/legacy/gserver/layers/CosSimVecMatLayer.cpp index 230ecc768b..03de0be815 100644 --- a/paddle/gserver/layers/CosSimVecMatLayer.cpp +++ b/paddle/legacy/gserver/layers/CosSimVecMatLayer.cpp @@ -13,9 +13,9 @@ See the License for the specific language governing permissions and limitations under the License. */ #include "Layer.h" -#include "paddle/math/Matrix.h" -#include "paddle/utils/Logging.h" -#include "paddle/utils/Stat.h" +#include "paddle/legacy/math/Matrix.h" +#include "paddle/legacy/utils/Logging.h" +#include "paddle/legacy/utils/Stat.h" namespace paddle { /** diff --git a/paddle/gserver/layers/CostLayer.cpp b/paddle/legacy/gserver/layers/CostLayer.cpp similarity index 99% rename from paddle/gserver/layers/CostLayer.cpp rename to paddle/legacy/gserver/layers/CostLayer.cpp index 1327616950..18b5b77bde 100644 --- a/paddle/gserver/layers/CostLayer.cpp +++ b/paddle/legacy/gserver/layers/CostLayer.cpp @@ -16,9 +16,9 @@ limitations under the License. */ #include #include #include -#include "paddle/utils/Logging.h" +#include "paddle/legacy/utils/Logging.h" -#include "paddle/math/SparseMatrix.h" +#include "paddle/legacy/math/SparseMatrix.h" namespace paddle { diff --git a/paddle/gserver/layers/CostLayer.h b/paddle/legacy/gserver/layers/CostLayer.h similarity index 100% rename from paddle/gserver/layers/CostLayer.h rename to paddle/legacy/gserver/layers/CostLayer.h diff --git a/paddle/gserver/layers/CropLayer.cpp b/paddle/legacy/gserver/layers/CropLayer.cpp similarity index 99% rename from paddle/gserver/layers/CropLayer.cpp rename to paddle/legacy/gserver/layers/CropLayer.cpp index bc97ca2f9e..d891375ecc 100644 --- a/paddle/gserver/layers/CropLayer.cpp +++ b/paddle/legacy/gserver/layers/CropLayer.cpp @@ -13,7 +13,7 @@ See the License for the specific language governing permissions and limitations under the License. */ #include "CropLayer.h" -#include "paddle/utils/Stat.h" +#include "paddle/legacy/utils/Stat.h" namespace paddle { REGISTER_LAYER(crop, CropLayer); diff --git a/paddle/gserver/layers/CropLayer.h b/paddle/legacy/gserver/layers/CropLayer.h similarity index 100% rename from paddle/gserver/layers/CropLayer.h rename to paddle/legacy/gserver/layers/CropLayer.h diff --git a/paddle/gserver/layers/CrossChannelNormLayer.cpp b/paddle/legacy/gserver/layers/CrossChannelNormLayer.cpp similarity index 98% rename from paddle/gserver/layers/CrossChannelNormLayer.cpp rename to paddle/legacy/gserver/layers/CrossChannelNormLayer.cpp index 644450291e..0fe100a96c 100644 --- a/paddle/gserver/layers/CrossChannelNormLayer.cpp +++ b/paddle/legacy/gserver/layers/CrossChannelNormLayer.cpp @@ -14,8 +14,8 @@ limitations under the License. */ #include "Layer.h" #include "NormLayer.h" -#include "paddle/math/BaseMatrix.h" -#include "paddle/math/Matrix.h" +#include "paddle/legacy/math/BaseMatrix.h" +#include "paddle/legacy/math/Matrix.h" namespace paddle { diff --git a/paddle/gserver/layers/CrossEntropyOverBeam.cpp b/paddle/legacy/gserver/layers/CrossEntropyOverBeam.cpp similarity index 100% rename from paddle/gserver/layers/CrossEntropyOverBeam.cpp rename to paddle/legacy/gserver/layers/CrossEntropyOverBeam.cpp diff --git a/paddle/gserver/layers/CrossEntropyOverBeam.h b/paddle/legacy/gserver/layers/CrossEntropyOverBeam.h similarity index 100% rename from paddle/gserver/layers/CrossEntropyOverBeam.h rename to paddle/legacy/gserver/layers/CrossEntropyOverBeam.h diff --git a/paddle/gserver/layers/CudnnBatchNormLayer.cpp b/paddle/legacy/gserver/layers/CudnnBatchNormLayer.cpp similarity index 98% rename from paddle/gserver/layers/CudnnBatchNormLayer.cpp rename to paddle/legacy/gserver/layers/CudnnBatchNormLayer.cpp index 9a29e6a55e..051155e0d2 100644 --- a/paddle/gserver/layers/CudnnBatchNormLayer.cpp +++ b/paddle/legacy/gserver/layers/CudnnBatchNormLayer.cpp @@ -14,8 +14,8 @@ limitations under the License. */ #include "CudnnBatchNormLayer.h" #include "Layer.h" -#include "paddle/cuda/include/hl_batch_norm.h" -#include "paddle/utils/Stat.h" +#include "paddle/legacy/cuda/include/hl_batch_norm.h" +#include "paddle/legacy/utils/Stat.h" namespace paddle { diff --git a/paddle/gserver/layers/CudnnBatchNormLayer.h b/paddle/legacy/gserver/layers/CudnnBatchNormLayer.h similarity index 98% rename from paddle/gserver/layers/CudnnBatchNormLayer.h rename to paddle/legacy/gserver/layers/CudnnBatchNormLayer.h index 1bb4eff8d2..3b33b983b3 100644 --- a/paddle/gserver/layers/CudnnBatchNormLayer.h +++ b/paddle/legacy/gserver/layers/CudnnBatchNormLayer.h @@ -17,7 +17,7 @@ limitations under the License. */ #include #include "BatchNormBaseLayer.h" #include "Layer.h" -#include "paddle/utils/Stat.h" +#include "paddle/legacy/utils/Stat.h" namespace paddle { diff --git a/paddle/gserver/layers/CudnnConvBaseLayer.cpp b/paddle/legacy/gserver/layers/CudnnConvBaseLayer.cpp similarity index 98% rename from paddle/gserver/layers/CudnnConvBaseLayer.cpp rename to paddle/legacy/gserver/layers/CudnnConvBaseLayer.cpp index 6d0a40a607..9353cca9c8 100644 --- a/paddle/gserver/layers/CudnnConvBaseLayer.cpp +++ b/paddle/legacy/gserver/layers/CudnnConvBaseLayer.cpp @@ -13,8 +13,8 @@ See the License for the specific language governing permissions and limitations under the License. */ #include "CudnnConvBaseLayer.h" -#include "paddle/utils/Logging.h" -#include "paddle/utils/Stat.h" +#include "paddle/legacy/utils/Logging.h" +#include "paddle/legacy/utils/Stat.h" namespace paddle { REGISTER_LAYER(cudnn_conv, CudnnConvBaseLayer); diff --git a/paddle/gserver/layers/CudnnConvBaseLayer.h b/paddle/legacy/gserver/layers/CudnnConvBaseLayer.h similarity index 97% rename from paddle/gserver/layers/CudnnConvBaseLayer.h rename to paddle/legacy/gserver/layers/CudnnConvBaseLayer.h index 1ee1aa100d..d050183eb7 100644 --- a/paddle/gserver/layers/CudnnConvBaseLayer.h +++ b/paddle/legacy/gserver/layers/CudnnConvBaseLayer.h @@ -17,7 +17,7 @@ limitations under the License. */ #include #include "ConvBaseLayer.h" #include "Projection.h" -#include "paddle/math/Matrix.h" +#include "paddle/legacy/math/Matrix.h" namespace paddle { diff --git a/paddle/gserver/layers/CudnnPoolLayer.cpp b/paddle/legacy/gserver/layers/CudnnPoolLayer.cpp similarity index 97% rename from paddle/gserver/layers/CudnnPoolLayer.cpp rename to paddle/legacy/gserver/layers/CudnnPoolLayer.cpp index ac6d2168f4..c790dfd71e 100644 --- a/paddle/gserver/layers/CudnnPoolLayer.cpp +++ b/paddle/legacy/gserver/layers/CudnnPoolLayer.cpp @@ -13,9 +13,9 @@ See the License for the specific language governing permissions and limitations under the License. */ #include "CudnnPoolLayer.h" -#include "paddle/math/Matrix.h" -#include "paddle/utils/Logging.h" -#include "paddle/utils/Stat.h" +#include "paddle/legacy/math/Matrix.h" +#include "paddle/legacy/utils/Logging.h" +#include "paddle/legacy/utils/Stat.h" namespace paddle { diff --git a/paddle/gserver/layers/CudnnPoolLayer.h b/paddle/legacy/gserver/layers/CudnnPoolLayer.h similarity index 100% rename from paddle/gserver/layers/CudnnPoolLayer.h rename to paddle/legacy/gserver/layers/CudnnPoolLayer.h diff --git a/paddle/gserver/layers/DataLayer.cpp b/paddle/legacy/gserver/layers/DataLayer.cpp similarity index 100% rename from paddle/gserver/layers/DataLayer.cpp rename to paddle/legacy/gserver/layers/DataLayer.cpp diff --git a/paddle/gserver/layers/DataLayer.h b/paddle/legacy/gserver/layers/DataLayer.h similarity index 100% rename from paddle/gserver/layers/DataLayer.h rename to paddle/legacy/gserver/layers/DataLayer.h diff --git a/paddle/gserver/layers/DataNormLayer.cpp b/paddle/legacy/gserver/layers/DataNormLayer.cpp similarity index 98% rename from paddle/gserver/layers/DataNormLayer.cpp rename to paddle/legacy/gserver/layers/DataNormLayer.cpp index 86da4d6f95..6820dfa4d4 100644 --- a/paddle/gserver/layers/DataNormLayer.cpp +++ b/paddle/legacy/gserver/layers/DataNormLayer.cpp @@ -13,8 +13,8 @@ See the License for the specific language governing permissions and limitations under the License. */ #include "DataNormLayer.h" -#include "paddle/utils/Logging.h" -#include "paddle/utils/Stat.h" +#include "paddle/legacy/utils/Logging.h" +#include "paddle/legacy/utils/Stat.h" namespace paddle { diff --git a/paddle/gserver/layers/DataNormLayer.h b/paddle/legacy/gserver/layers/DataNormLayer.h similarity index 95% rename from paddle/gserver/layers/DataNormLayer.h rename to paddle/legacy/gserver/layers/DataNormLayer.h index 7ae67a877b..7bb8e92824 100644 --- a/paddle/gserver/layers/DataNormLayer.h +++ b/paddle/legacy/gserver/layers/DataNormLayer.h @@ -15,8 +15,8 @@ limitations under the License. */ #pragma once #include "Layer.h" -#include "paddle/math/Matrix.h" -#include "paddle/utils/ThreadLocal.h" +#include "paddle/legacy/math/Matrix.h" +#include "paddle/legacy/utils/ThreadLocal.h" namespace paddle { diff --git a/paddle/gserver/layers/DeConv3DLayer.cpp b/paddle/legacy/gserver/layers/DeConv3DLayer.cpp similarity index 99% rename from paddle/gserver/layers/DeConv3DLayer.cpp rename to paddle/legacy/gserver/layers/DeConv3DLayer.cpp index db6d6e073c..2cd635564c 100644 --- a/paddle/gserver/layers/DeConv3DLayer.cpp +++ b/paddle/legacy/gserver/layers/DeConv3DLayer.cpp @@ -13,8 +13,8 @@ See the License for the specific language governing permissions and limitations under the License. */ #include "DeConv3DLayer.h" -#include "paddle/utils/Logging.h" -#include "paddle/utils/Stat.h" +#include "paddle/legacy/utils/Logging.h" +#include "paddle/legacy/utils/Stat.h" namespace paddle { diff --git a/paddle/gserver/layers/DeConv3DLayer.h b/paddle/legacy/gserver/layers/DeConv3DLayer.h similarity index 94% rename from paddle/gserver/layers/DeConv3DLayer.h rename to paddle/legacy/gserver/layers/DeConv3DLayer.h index 13d1d07cf5..9931bccb12 100644 --- a/paddle/gserver/layers/DeConv3DLayer.h +++ b/paddle/legacy/gserver/layers/DeConv3DLayer.h @@ -16,8 +16,8 @@ limitations under the License. */ #include #include "ConvBaseLayer.h" -#include "paddle/math/MathUtils.h" -#include "paddle/math/Matrix.h" +#include "paddle/legacy/math/MathUtils.h" +#include "paddle/legacy/math/Matrix.h" namespace paddle { diff --git a/paddle/gserver/layers/DetectionOutputLayer.cpp b/paddle/legacy/gserver/layers/DetectionOutputLayer.cpp similarity index 100% rename from paddle/gserver/layers/DetectionOutputLayer.cpp rename to paddle/legacy/gserver/layers/DetectionOutputLayer.cpp diff --git a/paddle/gserver/layers/DetectionOutputLayer.h b/paddle/legacy/gserver/layers/DetectionOutputLayer.h similarity index 100% rename from paddle/gserver/layers/DetectionOutputLayer.h rename to paddle/legacy/gserver/layers/DetectionOutputLayer.h diff --git a/paddle/gserver/layers/DetectionUtil.cpp b/paddle/legacy/gserver/layers/DetectionUtil.cpp similarity index 100% rename from paddle/gserver/layers/DetectionUtil.cpp rename to paddle/legacy/gserver/layers/DetectionUtil.cpp diff --git a/paddle/gserver/layers/DetectionUtil.h b/paddle/legacy/gserver/layers/DetectionUtil.h similarity index 99% rename from paddle/gserver/layers/DetectionUtil.h rename to paddle/legacy/gserver/layers/DetectionUtil.h index d6502fcf8f..c1e0bb809a 100644 --- a/paddle/gserver/layers/DetectionUtil.h +++ b/paddle/legacy/gserver/layers/DetectionUtil.h @@ -17,7 +17,7 @@ limitations under the License. */ #include #include #include -#include "paddle/math/Matrix.h" +#include "paddle/legacy/math/Matrix.h" using std::vector; using std::pair; diff --git a/paddle/gserver/layers/DotMulOperator.cpp b/paddle/legacy/gserver/layers/DotMulOperator.cpp similarity index 100% rename from paddle/gserver/layers/DotMulOperator.cpp rename to paddle/legacy/gserver/layers/DotMulOperator.cpp diff --git a/paddle/gserver/layers/DotMulProjection.cpp b/paddle/legacy/gserver/layers/DotMulProjection.cpp similarity index 100% rename from paddle/gserver/layers/DotMulProjection.cpp rename to paddle/legacy/gserver/layers/DotMulProjection.cpp diff --git a/paddle/gserver/layers/DotProdLayer.cpp b/paddle/legacy/gserver/layers/DotProdLayer.cpp similarity index 95% rename from paddle/gserver/layers/DotProdLayer.cpp rename to paddle/legacy/gserver/layers/DotProdLayer.cpp index 72b0c707b2..06060d93f7 100644 --- a/paddle/gserver/layers/DotProdLayer.cpp +++ b/paddle/legacy/gserver/layers/DotProdLayer.cpp @@ -13,9 +13,9 @@ See the License for the specific language governing permissions and limitations under the License. */ #include "Layer.h" -#include "paddle/math/Matrix.h" -#include "paddle/utils/Logging.h" -#include "paddle/utils/Stat.h" +#include "paddle/legacy/math/Matrix.h" +#include "paddle/legacy/utils/Logging.h" +#include "paddle/legacy/utils/Stat.h" namespace paddle { diff --git a/paddle/gserver/layers/EosIdCheckLayer.cpp b/paddle/legacy/gserver/layers/EosIdCheckLayer.cpp similarity index 97% rename from paddle/gserver/layers/EosIdCheckLayer.cpp rename to paddle/legacy/gserver/layers/EosIdCheckLayer.cpp index 04400f2836..38671126c6 100644 --- a/paddle/gserver/layers/EosIdCheckLayer.cpp +++ b/paddle/legacy/gserver/layers/EosIdCheckLayer.cpp @@ -13,7 +13,7 @@ See the License for the specific language governing permissions and limitations under the License. */ #include "Layer.h" -#include "paddle/utils/Logging.h" +#include "paddle/legacy/utils/Logging.h" namespace paddle { /** diff --git a/paddle/gserver/layers/ExpandConvLayer.cpp b/paddle/legacy/gserver/layers/ExpandConvLayer.cpp similarity index 99% rename from paddle/gserver/layers/ExpandConvLayer.cpp rename to paddle/legacy/gserver/layers/ExpandConvLayer.cpp index 3a84786582..8a53db3806 100644 --- a/paddle/gserver/layers/ExpandConvLayer.cpp +++ b/paddle/legacy/gserver/layers/ExpandConvLayer.cpp @@ -13,8 +13,8 @@ See the License for the specific language governing permissions and limitations under the License. */ #include "ExpandConvLayer.h" -#include "paddle/utils/Logging.h" -#include "paddle/utils/Stat.h" +#include "paddle/legacy/utils/Logging.h" +#include "paddle/legacy/utils/Stat.h" DEFINE_bool(use_nnpack, false, diff --git a/paddle/gserver/layers/ExpandConvLayer.h b/paddle/legacy/gserver/layers/ExpandConvLayer.h similarity index 97% rename from paddle/gserver/layers/ExpandConvLayer.h rename to paddle/legacy/gserver/layers/ExpandConvLayer.h index 6919ef7135..c0eff3ab06 100644 --- a/paddle/gserver/layers/ExpandConvLayer.h +++ b/paddle/legacy/gserver/layers/ExpandConvLayer.h @@ -16,7 +16,7 @@ limitations under the License. */ #include #include "ConvBaseLayer.h" -#include "paddle/math/Matrix.h" +#include "paddle/legacy/math/Matrix.h" namespace paddle { diff --git a/paddle/gserver/layers/ExpandLayer.cpp b/paddle/legacy/gserver/layers/ExpandLayer.cpp similarity index 98% rename from paddle/gserver/layers/ExpandLayer.cpp rename to paddle/legacy/gserver/layers/ExpandLayer.cpp index 6b57767540..074fbab8ef 100644 --- a/paddle/gserver/layers/ExpandLayer.cpp +++ b/paddle/legacy/gserver/layers/ExpandLayer.cpp @@ -13,8 +13,8 @@ See the License for the specific language governing permissions and limitations under the License. */ #include "ExpandLayer.h" -#include "paddle/utils/Logging.h" -#include "paddle/utils/Stat.h" +#include "paddle/legacy/utils/Logging.h" +#include "paddle/legacy/utils/Stat.h" namespace paddle { diff --git a/paddle/gserver/layers/ExpandLayer.h b/paddle/legacy/gserver/layers/ExpandLayer.h similarity index 98% rename from paddle/gserver/layers/ExpandLayer.h rename to paddle/legacy/gserver/layers/ExpandLayer.h index 06bd4ef05e..75a1ec7568 100644 --- a/paddle/gserver/layers/ExpandLayer.h +++ b/paddle/legacy/gserver/layers/ExpandLayer.h @@ -15,7 +15,7 @@ limitations under the License. */ #pragma once #include "Layer.h" -#include "paddle/math/Matrix.h" +#include "paddle/legacy/math/Matrix.h" namespace paddle { diff --git a/paddle/gserver/layers/FactorizationMachineLayer.cpp b/paddle/legacy/gserver/layers/FactorizationMachineLayer.cpp similarity index 97% rename from paddle/gserver/layers/FactorizationMachineLayer.cpp rename to paddle/legacy/gserver/layers/FactorizationMachineLayer.cpp index 1744faada2..6cf269fa3f 100644 --- a/paddle/gserver/layers/FactorizationMachineLayer.cpp +++ b/paddle/legacy/gserver/layers/FactorizationMachineLayer.cpp @@ -15,9 +15,9 @@ limitations under the License. */ #include "FactorizationMachineLayer.h" #include #include -#include "paddle/math/SparseMatrix.h" -#include "paddle/utils/Logging.h" -#include "paddle/utils/Stat.h" +#include "paddle/legacy/math/SparseMatrix.h" +#include "paddle/legacy/utils/Logging.h" +#include "paddle/legacy/utils/Stat.h" namespace paddle { diff --git a/paddle/gserver/layers/FactorizationMachineLayer.h b/paddle/legacy/gserver/layers/FactorizationMachineLayer.h similarity index 96% rename from paddle/gserver/layers/FactorizationMachineLayer.h rename to paddle/legacy/gserver/layers/FactorizationMachineLayer.h index 148abe2381..fc015ed727 100644 --- a/paddle/gserver/layers/FactorizationMachineLayer.h +++ b/paddle/legacy/gserver/layers/FactorizationMachineLayer.h @@ -15,8 +15,8 @@ limitations under the License. */ #pragma once #include "Layer.h" -#include "paddle/math/Matrix.h" -#include "paddle/utils/ThreadLocal.h" +#include "paddle/legacy/math/Matrix.h" +#include "paddle/legacy/utils/ThreadLocal.h" namespace paddle { /** diff --git a/paddle/gserver/layers/FeatureMapExpandLayer.cpp b/paddle/legacy/gserver/layers/FeatureMapExpandLayer.cpp similarity index 98% rename from paddle/gserver/layers/FeatureMapExpandLayer.cpp rename to paddle/legacy/gserver/layers/FeatureMapExpandLayer.cpp index d95f0b9b3d..a3fe1433e4 100644 --- a/paddle/gserver/layers/FeatureMapExpandLayer.cpp +++ b/paddle/legacy/gserver/layers/FeatureMapExpandLayer.cpp @@ -13,8 +13,8 @@ See the License for the specific language governing permissions and limitations under the License. */ #include "Layer.h" -#include "paddle/math/Matrix.h" -#include "paddle/utils/Stat.h" +#include "paddle/legacy/math/Matrix.h" +#include "paddle/legacy/utils/Stat.h" namespace paddle { diff --git a/paddle/gserver/layers/FullMatrixProjection.cpp b/paddle/legacy/gserver/layers/FullMatrixProjection.cpp similarity index 100% rename from paddle/gserver/layers/FullMatrixProjection.cpp rename to paddle/legacy/gserver/layers/FullMatrixProjection.cpp diff --git a/paddle/gserver/layers/FullMatrixProjection.h b/paddle/legacy/gserver/layers/FullMatrixProjection.h similarity index 96% rename from paddle/gserver/layers/FullMatrixProjection.h rename to paddle/legacy/gserver/layers/FullMatrixProjection.h index a27aa4a123..c33d02a3ae 100644 --- a/paddle/gserver/layers/FullMatrixProjection.h +++ b/paddle/legacy/gserver/layers/FullMatrixProjection.h @@ -13,7 +13,7 @@ See the License for the specific language governing permissions and limitations under the License. */ #pragma once -#include "paddle/utils/Stat.h" +#include "paddle/legacy/utils/Stat.h" #include "Projection.h" diff --git a/paddle/gserver/layers/FullyConnectedLayer.cpp b/paddle/legacy/gserver/layers/FullyConnectedLayer.cpp similarity index 97% rename from paddle/gserver/layers/FullyConnectedLayer.cpp rename to paddle/legacy/gserver/layers/FullyConnectedLayer.cpp index 21ffa01d95..07f4dfbe39 100644 --- a/paddle/gserver/layers/FullyConnectedLayer.cpp +++ b/paddle/legacy/gserver/layers/FullyConnectedLayer.cpp @@ -15,9 +15,9 @@ limitations under the License. */ #include "FullyConnectedLayer.h" #include #include -#include "paddle/math/SparseMatrix.h" -#include "paddle/utils/Logging.h" -#include "paddle/utils/Stat.h" +#include "paddle/legacy/math/SparseMatrix.h" +#include "paddle/legacy/utils/Logging.h" +#include "paddle/legacy/utils/Stat.h" namespace paddle { diff --git a/paddle/gserver/layers/FullyConnectedLayer.h b/paddle/legacy/gserver/layers/FullyConnectedLayer.h similarity index 94% rename from paddle/gserver/layers/FullyConnectedLayer.h rename to paddle/legacy/gserver/layers/FullyConnectedLayer.h index e0f9d6ce55..7e29cac043 100644 --- a/paddle/gserver/layers/FullyConnectedLayer.h +++ b/paddle/legacy/gserver/layers/FullyConnectedLayer.h @@ -15,8 +15,8 @@ limitations under the License. */ #pragma once #include "Layer.h" -#include "paddle/math/Matrix.h" -#include "paddle/utils/ThreadLocal.h" +#include "paddle/legacy/math/Matrix.h" +#include "paddle/legacy/utils/ThreadLocal.h" namespace paddle { /** diff --git a/paddle/gserver/layers/GatedRecurrentLayer.cpp b/paddle/legacy/gserver/layers/GatedRecurrentLayer.cpp similarity index 99% rename from paddle/gserver/layers/GatedRecurrentLayer.cpp rename to paddle/legacy/gserver/layers/GatedRecurrentLayer.cpp index 9d38849fdf..bdcd445cb4 100644 --- a/paddle/gserver/layers/GatedRecurrentLayer.cpp +++ b/paddle/legacy/gserver/layers/GatedRecurrentLayer.cpp @@ -14,7 +14,7 @@ limitations under the License. */ #include "GatedRecurrentLayer.h" #include "Layer.h" -#include "paddle/utils/Stat.h" +#include "paddle/legacy/utils/Stat.h" namespace paddle { diff --git a/paddle/gserver/layers/GatedRecurrentLayer.h b/paddle/legacy/gserver/layers/GatedRecurrentLayer.h similarity index 98% rename from paddle/gserver/layers/GatedRecurrentLayer.h rename to paddle/legacy/gserver/layers/GatedRecurrentLayer.h index 46508dc977..8bbf01ce20 100644 --- a/paddle/gserver/layers/GatedRecurrentLayer.h +++ b/paddle/legacy/gserver/layers/GatedRecurrentLayer.h @@ -17,7 +17,7 @@ limitations under the License. */ #include "GruCompute.h" #include "Layer.h" #include "SequenceToBatch.h" -#include "paddle/math/Matrix.h" +#include "paddle/legacy/math/Matrix.h" namespace paddle { diff --git a/paddle/gserver/layers/GetOutputLayer.cpp b/paddle/legacy/gserver/layers/GetOutputLayer.cpp similarity index 100% rename from paddle/gserver/layers/GetOutputLayer.cpp rename to paddle/legacy/gserver/layers/GetOutputLayer.cpp diff --git a/paddle/gserver/layers/GruCompute.cpp b/paddle/legacy/gserver/layers/GruCompute.cpp similarity index 95% rename from paddle/gserver/layers/GruCompute.cpp rename to paddle/legacy/gserver/layers/GruCompute.cpp index 48ddbc413e..adad6285b7 100644 --- a/paddle/gserver/layers/GruCompute.cpp +++ b/paddle/legacy/gserver/layers/GruCompute.cpp @@ -14,8 +14,8 @@ limitations under the License. */ #include "GruCompute.h" #include "hl_recurrent_apply.cuh" -#include "paddle/function/GruFunctor.h" -#include "paddle/utils/Util.h" +#include "paddle/legacy/function/GruFunctor.h" +#include "paddle/legacy/utils/Util.h" namespace paddle { diff --git a/paddle/gserver/layers/GruCompute.cu b/paddle/legacy/gserver/layers/GruCompute.cu similarity index 100% rename from paddle/gserver/layers/GruCompute.cu rename to paddle/legacy/gserver/layers/GruCompute.cu diff --git a/paddle/gserver/layers/GruCompute.h b/paddle/legacy/gserver/layers/GruCompute.h similarity index 96% rename from paddle/gserver/layers/GruCompute.h rename to paddle/legacy/gserver/layers/GruCompute.h index 50006325ce..6feea7aca8 100644 --- a/paddle/gserver/layers/GruCompute.h +++ b/paddle/legacy/gserver/layers/GruCompute.h @@ -16,7 +16,7 @@ limitations under the License. */ #include "ModelConfig.pb.h" #include "hl_gpu.h" -#include "paddle/utils/Common.h" +#include "paddle/legacy/utils/Common.h" namespace paddle { diff --git a/paddle/gserver/layers/GruStepLayer.cpp b/paddle/legacy/gserver/layers/GruStepLayer.cpp similarity index 99% rename from paddle/gserver/layers/GruStepLayer.cpp rename to paddle/legacy/gserver/layers/GruStepLayer.cpp index 114f287411..2480e42d68 100644 --- a/paddle/gserver/layers/GruStepLayer.cpp +++ b/paddle/legacy/gserver/layers/GruStepLayer.cpp @@ -14,7 +14,7 @@ limitations under the License. */ #include "GruCompute.h" #include "Layer.h" -#include "paddle/utils/Stat.h" +#include "paddle/legacy/utils/Stat.h" namespace paddle { diff --git a/paddle/gserver/layers/HierarchicalSigmoidLayer.cpp b/paddle/legacy/gserver/layers/HierarchicalSigmoidLayer.cpp similarity index 99% rename from paddle/gserver/layers/HierarchicalSigmoidLayer.cpp rename to paddle/legacy/gserver/layers/HierarchicalSigmoidLayer.cpp index 3e720f179e..3449599409 100644 --- a/paddle/gserver/layers/HierarchicalSigmoidLayer.cpp +++ b/paddle/legacy/gserver/layers/HierarchicalSigmoidLayer.cpp @@ -13,7 +13,7 @@ See the License for the specific language governing permissions and limitations under the License. */ #include "HierarchicalSigmoidLayer.h" -#include "paddle/utils/Util.h" +#include "paddle/legacy/utils/Util.h" namespace paddle { diff --git a/paddle/gserver/layers/HierarchicalSigmoidLayer.h b/paddle/legacy/gserver/layers/HierarchicalSigmoidLayer.h similarity index 100% rename from paddle/gserver/layers/HierarchicalSigmoidLayer.h rename to paddle/legacy/gserver/layers/HierarchicalSigmoidLayer.h diff --git a/paddle/gserver/layers/IdentityProjection.cpp b/paddle/legacy/gserver/layers/IdentityProjection.cpp similarity index 98% rename from paddle/gserver/layers/IdentityProjection.cpp rename to paddle/legacy/gserver/layers/IdentityProjection.cpp index 34e9eb9016..f707642e09 100644 --- a/paddle/gserver/layers/IdentityProjection.cpp +++ b/paddle/legacy/gserver/layers/IdentityProjection.cpp @@ -13,7 +13,7 @@ See the License for the specific language governing permissions and limitations under the License. */ #include "Projection.h" -#include "paddle/utils/Stat.h" +#include "paddle/legacy/utils/Stat.h" namespace paddle { diff --git a/paddle/gserver/layers/InterpolationLayer.cpp b/paddle/legacy/gserver/layers/InterpolationLayer.cpp similarity index 96% rename from paddle/gserver/layers/InterpolationLayer.cpp rename to paddle/legacy/gserver/layers/InterpolationLayer.cpp index 509c07cf22..ed2294e8a3 100644 --- a/paddle/gserver/layers/InterpolationLayer.cpp +++ b/paddle/legacy/gserver/layers/InterpolationLayer.cpp @@ -13,9 +13,9 @@ See the License for the specific language governing permissions and limitations under the License. */ #include "Layer.h" -#include "paddle/math/Matrix.h" -#include "paddle/utils/Logging.h" -#include "paddle/utils/Stat.h" +#include "paddle/legacy/math/Matrix.h" +#include "paddle/legacy/utils/Logging.h" +#include "paddle/legacy/utils/Stat.h" namespace paddle { diff --git a/paddle/gserver/layers/KmaxSeqScoreLayer.cpp b/paddle/legacy/gserver/layers/KmaxSeqScoreLayer.cpp similarity index 100% rename from paddle/gserver/layers/KmaxSeqScoreLayer.cpp rename to paddle/legacy/gserver/layers/KmaxSeqScoreLayer.cpp diff --git a/paddle/gserver/layers/L2DistanceLayer.cpp b/paddle/legacy/gserver/layers/L2DistanceLayer.cpp similarity index 97% rename from paddle/gserver/layers/L2DistanceLayer.cpp rename to paddle/legacy/gserver/layers/L2DistanceLayer.cpp index c8cca3762c..a3e627e570 100644 --- a/paddle/gserver/layers/L2DistanceLayer.cpp +++ b/paddle/legacy/gserver/layers/L2DistanceLayer.cpp @@ -13,8 +13,8 @@ See the License for the specific language governing permissions and limitations under the License. */ #include "L2DistanceLayer.h" -#include "paddle/utils/Logging.h" -#include "paddle/utils/Stat.h" +#include "paddle/legacy/utils/Logging.h" +#include "paddle/legacy/utils/Stat.h" namespace paddle { diff --git a/paddle/gserver/layers/L2DistanceLayer.h b/paddle/legacy/gserver/layers/L2DistanceLayer.h similarity index 97% rename from paddle/gserver/layers/L2DistanceLayer.h rename to paddle/legacy/gserver/layers/L2DistanceLayer.h index 44e688e137..aa8aabd9ca 100644 --- a/paddle/gserver/layers/L2DistanceLayer.h +++ b/paddle/legacy/gserver/layers/L2DistanceLayer.h @@ -15,7 +15,7 @@ limitations under the License. */ #pragma once #include "Layer.h" -#include "paddle/math/Matrix.h" +#include "paddle/legacy/math/Matrix.h" namespace paddle { diff --git a/paddle/gserver/layers/Layer.cpp b/paddle/legacy/gserver/layers/Layer.cpp similarity index 98% rename from paddle/gserver/layers/Layer.cpp rename to paddle/legacy/gserver/layers/Layer.cpp index 32e2f4c9dd..890d33552d 100644 --- a/paddle/gserver/layers/Layer.cpp +++ b/paddle/legacy/gserver/layers/Layer.cpp @@ -12,12 +12,12 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. */ -#include "paddle/utils/Util.h" +#include "paddle/legacy/utils/Util.h" #include "CostLayer.h" -#include "paddle/math/SparseMatrix.h" -#include "paddle/utils/Error.h" -#include "paddle/utils/Logging.h" +#include "paddle/legacy/math/SparseMatrix.h" +#include "paddle/legacy/utils/Error.h" +#include "paddle/legacy/utils/Logging.h" #ifndef PADDLE_MOBILE_INFERENCE #include "ValidationLayer.h" diff --git a/paddle/gserver/layers/Layer.h b/paddle/legacy/gserver/layers/Layer.h similarity index 97% rename from paddle/gserver/layers/Layer.h rename to paddle/legacy/gserver/layers/Layer.h index 13e20e8316..a7ff76dece 100644 --- a/paddle/gserver/layers/Layer.h +++ b/paddle/legacy/gserver/layers/Layer.h @@ -17,14 +17,14 @@ limitations under the License. */ #include #include #include "ModelConfig.pb.h" -#include "paddle/function/Function.h" -#include "paddle/gserver/activations/ActivationFunction.h" -#include "paddle/math/CpuSparseMatrix.h" -#include "paddle/parameter/Argument.h" -#include "paddle/parameter/Parameter.h" -#include "paddle/parameter/Weight.h" -#include "paddle/utils/ClassRegistrar.h" -#include "paddle/utils/Util.h" +#include "paddle/legacy/function/Function.h" +#include "paddle/legacy/gserver/activations/ActivationFunction.h" +#include "paddle/legacy/math/CpuSparseMatrix.h" +#include "paddle/legacy/parameter/Argument.h" +#include "paddle/legacy/parameter/Parameter.h" +#include "paddle/legacy/parameter/Weight.h" +#include "paddle/legacy/utils/ClassRegistrar.h" +#include "paddle/legacy/utils/Util.h" /// Macro for registering a layer type. /// Example: REGISTER_LAYER(crf_error, CRFDecodingErrorLayer); diff --git a/paddle/gserver/layers/LinearChainCRF.cpp b/paddle/legacy/gserver/layers/LinearChainCRF.cpp similarity index 100% rename from paddle/gserver/layers/LinearChainCRF.cpp rename to paddle/legacy/gserver/layers/LinearChainCRF.cpp diff --git a/paddle/gserver/layers/LinearChainCRF.h b/paddle/legacy/gserver/layers/LinearChainCRF.h similarity index 98% rename from paddle/gserver/layers/LinearChainCRF.h rename to paddle/legacy/gserver/layers/LinearChainCRF.h index e802b701d0..65e2390543 100644 --- a/paddle/gserver/layers/LinearChainCRF.h +++ b/paddle/legacy/gserver/layers/LinearChainCRF.h @@ -14,7 +14,7 @@ limitations under the License. */ #pragma once -#include "paddle/math/Matrix.h" +#include "paddle/legacy/math/Matrix.h" namespace paddle { diff --git a/paddle/gserver/layers/LinearChainCTC.cpp b/paddle/legacy/gserver/layers/LinearChainCTC.cpp similarity index 100% rename from paddle/gserver/layers/LinearChainCTC.cpp rename to paddle/legacy/gserver/layers/LinearChainCTC.cpp diff --git a/paddle/gserver/layers/LinearChainCTC.h b/paddle/legacy/gserver/layers/LinearChainCTC.h similarity index 97% rename from paddle/gserver/layers/LinearChainCTC.h rename to paddle/legacy/gserver/layers/LinearChainCTC.h index 5b325a0deb..e6c4c7bfe0 100644 --- a/paddle/gserver/layers/LinearChainCTC.h +++ b/paddle/legacy/gserver/layers/LinearChainCTC.h @@ -15,7 +15,7 @@ limitations under the License. */ #pragma once #include -#include "paddle/math/Matrix.h" +#include "paddle/legacy/math/Matrix.h" namespace paddle { diff --git a/paddle/gserver/layers/LstmCompute.cpp b/paddle/legacy/gserver/layers/LstmCompute.cpp similarity index 98% rename from paddle/gserver/layers/LstmCompute.cpp rename to paddle/legacy/gserver/layers/LstmCompute.cpp index ea30f6d6b1..70f08e1d4e 100644 --- a/paddle/gserver/layers/LstmCompute.cpp +++ b/paddle/legacy/gserver/layers/LstmCompute.cpp @@ -14,7 +14,7 @@ limitations under the License. */ #include "LstmCompute.h" #include "hl_recurrent_apply.cuh" -#include "paddle/utils/Util.h" +#include "paddle/legacy/utils/Util.h" namespace paddle { diff --git a/paddle/gserver/layers/LstmCompute.cu b/paddle/legacy/gserver/layers/LstmCompute.cu similarity index 100% rename from paddle/gserver/layers/LstmCompute.cu rename to paddle/legacy/gserver/layers/LstmCompute.cu diff --git a/paddle/gserver/layers/LstmCompute.h b/paddle/legacy/gserver/layers/LstmCompute.h similarity index 98% rename from paddle/gserver/layers/LstmCompute.h rename to paddle/legacy/gserver/layers/LstmCompute.h index 80fb01cd18..ac40c35ef1 100644 --- a/paddle/gserver/layers/LstmCompute.h +++ b/paddle/legacy/gserver/layers/LstmCompute.h @@ -16,7 +16,7 @@ limitations under the License. */ #include "ModelConfig.pb.h" #include "hl_gpu.h" -#include "paddle/utils/Common.h" +#include "paddle/legacy/utils/Common.h" namespace paddle { diff --git a/paddle/gserver/layers/LstmLayer.cpp b/paddle/legacy/gserver/layers/LstmLayer.cpp similarity index 99% rename from paddle/gserver/layers/LstmLayer.cpp rename to paddle/legacy/gserver/layers/LstmLayer.cpp index f65ae6a3e6..43a55d8d49 100644 --- a/paddle/gserver/layers/LstmLayer.cpp +++ b/paddle/legacy/gserver/layers/LstmLayer.cpp @@ -13,9 +13,9 @@ See the License for the specific language governing permissions and limitations under the License. */ #include "LstmLayer.h" -#include "paddle/math/BaseMatrix.h" -#include "paddle/math/Matrix.h" -#include "paddle/utils/Stat.h" +#include "paddle/legacy/math/BaseMatrix.h" +#include "paddle/legacy/math/Matrix.h" +#include "paddle/legacy/utils/Stat.h" DECLARE_bool(prev_batch_state); diff --git a/paddle/gserver/layers/LstmLayer.h b/paddle/legacy/gserver/layers/LstmLayer.h similarity index 98% rename from paddle/gserver/layers/LstmLayer.h rename to paddle/legacy/gserver/layers/LstmLayer.h index 76dfe8146b..8c8b382f50 100644 --- a/paddle/gserver/layers/LstmLayer.h +++ b/paddle/legacy/gserver/layers/LstmLayer.h @@ -17,8 +17,8 @@ limitations under the License. */ #include "Layer.h" #include "LstmCompute.h" #include "SequenceToBatch.h" -#include "paddle/math/BaseMatrix.h" -#include "paddle/math/Matrix.h" +#include "paddle/legacy/math/BaseMatrix.h" +#include "paddle/legacy/math/Matrix.h" namespace paddle { /** diff --git a/paddle/gserver/layers/LstmStepLayer.cpp b/paddle/legacy/gserver/layers/LstmStepLayer.cpp similarity index 99% rename from paddle/gserver/layers/LstmStepLayer.cpp rename to paddle/legacy/gserver/layers/LstmStepLayer.cpp index c44768ddb2..f02f8ad62f 100644 --- a/paddle/gserver/layers/LstmStepLayer.cpp +++ b/paddle/legacy/gserver/layers/LstmStepLayer.cpp @@ -14,7 +14,7 @@ limitations under the License. */ #include "Layer.h" #include "LstmCompute.h" -#include "paddle/utils/Stat.h" +#include "paddle/legacy/utils/Stat.h" namespace paddle { diff --git a/paddle/gserver/layers/MDLstmLayer.cpp b/paddle/legacy/gserver/layers/MDLstmLayer.cpp similarity index 99% rename from paddle/gserver/layers/MDLstmLayer.cpp rename to paddle/legacy/gserver/layers/MDLstmLayer.cpp index 22c28157c5..4838183e8c 100644 --- a/paddle/gserver/layers/MDLstmLayer.cpp +++ b/paddle/legacy/gserver/layers/MDLstmLayer.cpp @@ -13,8 +13,8 @@ See the License for the specific language governing permissions and limitations under the License. */ #include "LstmLayer.h" -#include "paddle/math/BaseMatrix.h" -#include "paddle/math/Matrix.h" +#include "paddle/legacy/math/BaseMatrix.h" +#include "paddle/legacy/math/Matrix.h" namespace paddle { diff --git a/paddle/gserver/layers/MKLDNNAddtoLayer.cpp b/paddle/legacy/gserver/layers/MKLDNNAddtoLayer.cpp similarity index 100% rename from paddle/gserver/layers/MKLDNNAddtoLayer.cpp rename to paddle/legacy/gserver/layers/MKLDNNAddtoLayer.cpp diff --git a/paddle/gserver/layers/MKLDNNAddtoLayer.h b/paddle/legacy/gserver/layers/MKLDNNAddtoLayer.h similarity index 100% rename from paddle/gserver/layers/MKLDNNAddtoLayer.h rename to paddle/legacy/gserver/layers/MKLDNNAddtoLayer.h diff --git a/paddle/gserver/layers/MKLDNNBase.h b/paddle/legacy/gserver/layers/MKLDNNBase.h similarity index 100% rename from paddle/gserver/layers/MKLDNNBase.h rename to paddle/legacy/gserver/layers/MKLDNNBase.h diff --git a/paddle/gserver/layers/MKLDNNBatchNormLayer.cpp b/paddle/legacy/gserver/layers/MKLDNNBatchNormLayer.cpp similarity index 100% rename from paddle/gserver/layers/MKLDNNBatchNormLayer.cpp rename to paddle/legacy/gserver/layers/MKLDNNBatchNormLayer.cpp diff --git a/paddle/gserver/layers/MKLDNNBatchNormLayer.h b/paddle/legacy/gserver/layers/MKLDNNBatchNormLayer.h similarity index 100% rename from paddle/gserver/layers/MKLDNNBatchNormLayer.h rename to paddle/legacy/gserver/layers/MKLDNNBatchNormLayer.h diff --git a/paddle/gserver/layers/MKLDNNConcatLayer.cpp b/paddle/legacy/gserver/layers/MKLDNNConcatLayer.cpp similarity index 100% rename from paddle/gserver/layers/MKLDNNConcatLayer.cpp rename to paddle/legacy/gserver/layers/MKLDNNConcatLayer.cpp diff --git a/paddle/gserver/layers/MKLDNNConcatLayer.h b/paddle/legacy/gserver/layers/MKLDNNConcatLayer.h similarity index 100% rename from paddle/gserver/layers/MKLDNNConcatLayer.h rename to paddle/legacy/gserver/layers/MKLDNNConcatLayer.h diff --git a/paddle/gserver/layers/MKLDNNConvLayer.cpp b/paddle/legacy/gserver/layers/MKLDNNConvLayer.cpp similarity index 99% rename from paddle/gserver/layers/MKLDNNConvLayer.cpp rename to paddle/legacy/gserver/layers/MKLDNNConvLayer.cpp index a442a0a013..b47bf14821 100644 --- a/paddle/gserver/layers/MKLDNNConvLayer.cpp +++ b/paddle/legacy/gserver/layers/MKLDNNConvLayer.cpp @@ -13,8 +13,8 @@ See the License for the specific language governing permissions and limitations under the License. */ #include "MKLDNNConvLayer.h" -#include "paddle/math/MathUtils.h" -#include "paddle/utils/Logging.h" +#include "paddle/legacy/math/MathUtils.h" +#include "paddle/legacy/utils/Logging.h" using namespace mkldnn; // NOLINT typedef memory::format format; diff --git a/paddle/gserver/layers/MKLDNNConvLayer.h b/paddle/legacy/gserver/layers/MKLDNNConvLayer.h similarity index 100% rename from paddle/gserver/layers/MKLDNNConvLayer.h rename to paddle/legacy/gserver/layers/MKLDNNConvLayer.h diff --git a/paddle/gserver/layers/MKLDNNFcLayer.cpp b/paddle/legacy/gserver/layers/MKLDNNFcLayer.cpp similarity index 99% rename from paddle/gserver/layers/MKLDNNFcLayer.cpp rename to paddle/legacy/gserver/layers/MKLDNNFcLayer.cpp index 0c7e6f16e2..f3747c7db8 100644 --- a/paddle/gserver/layers/MKLDNNFcLayer.cpp +++ b/paddle/legacy/gserver/layers/MKLDNNFcLayer.cpp @@ -13,7 +13,7 @@ See the License for the specific language governing permissions and limitations under the License. */ #include "MKLDNNFcLayer.h" -#include "paddle/utils/Logging.h" +#include "paddle/legacy/utils/Logging.h" using namespace mkldnn; // NOLINT typedef memory::format format; diff --git a/paddle/gserver/layers/MKLDNNFcLayer.h b/paddle/legacy/gserver/layers/MKLDNNFcLayer.h similarity index 100% rename from paddle/gserver/layers/MKLDNNFcLayer.h rename to paddle/legacy/gserver/layers/MKLDNNFcLayer.h diff --git a/paddle/gserver/layers/MKLDNNLRNLayer.cpp b/paddle/legacy/gserver/layers/MKLDNNLRNLayer.cpp similarity index 99% rename from paddle/gserver/layers/MKLDNNLRNLayer.cpp rename to paddle/legacy/gserver/layers/MKLDNNLRNLayer.cpp index 88513ab8bc..739482348f 100644 --- a/paddle/gserver/layers/MKLDNNLRNLayer.cpp +++ b/paddle/legacy/gserver/layers/MKLDNNLRNLayer.cpp @@ -13,7 +13,7 @@ See the License for the specific language governing permissions and limitations under the License. */ #include "MKLDNNLRNLayer.h" -#include "paddle/utils/Logging.h" +#include "paddle/legacy/utils/Logging.h" using namespace mkldnn; // NOLINT typedef memory::format format; diff --git a/paddle/gserver/layers/MKLDNNLRNLayer.h b/paddle/legacy/gserver/layers/MKLDNNLRNLayer.h similarity index 100% rename from paddle/gserver/layers/MKLDNNLRNLayer.h rename to paddle/legacy/gserver/layers/MKLDNNLRNLayer.h diff --git a/paddle/gserver/layers/MKLDNNLayer.cpp b/paddle/legacy/gserver/layers/MKLDNNLayer.cpp similarity index 100% rename from paddle/gserver/layers/MKLDNNLayer.cpp rename to paddle/legacy/gserver/layers/MKLDNNLayer.cpp diff --git a/paddle/gserver/layers/MKLDNNLayer.h b/paddle/legacy/gserver/layers/MKLDNNLayer.h similarity index 99% rename from paddle/gserver/layers/MKLDNNLayer.h rename to paddle/legacy/gserver/layers/MKLDNNLayer.h index 2b164d0d3b..94dc8625f6 100644 --- a/paddle/gserver/layers/MKLDNNLayer.h +++ b/paddle/legacy/gserver/layers/MKLDNNLayer.h @@ -18,8 +18,8 @@ limitations under the License. */ #include "Layer.h" #include "MKLDNNBase.h" #include "mkldnn.hpp" -#include "paddle/math/MKLDNNMatrix.h" -#include "paddle/utils/Stat.h" +#include "paddle/legacy/math/MKLDNNMatrix.h" +#include "paddle/legacy/utils/Stat.h" DECLARE_bool(use_mkldnn); diff --git a/paddle/gserver/layers/MKLDNNPoolLayer.cpp b/paddle/legacy/gserver/layers/MKLDNNPoolLayer.cpp similarity index 98% rename from paddle/gserver/layers/MKLDNNPoolLayer.cpp rename to paddle/legacy/gserver/layers/MKLDNNPoolLayer.cpp index 3be848c749..83d980538d 100644 --- a/paddle/gserver/layers/MKLDNNPoolLayer.cpp +++ b/paddle/legacy/gserver/layers/MKLDNNPoolLayer.cpp @@ -13,8 +13,8 @@ See the License for the specific language governing permissions and limitations under the License. */ #include "MKLDNNPoolLayer.h" -#include "paddle/math/MathUtils.h" -#include "paddle/utils/Logging.h" +#include "paddle/legacy/math/MathUtils.h" +#include "paddle/legacy/utils/Logging.h" using namespace mkldnn; // NOLINT typedef memory::format format; diff --git a/paddle/gserver/layers/MKLDNNPoolLayer.h b/paddle/legacy/gserver/layers/MKLDNNPoolLayer.h similarity index 100% rename from paddle/gserver/layers/MKLDNNPoolLayer.h rename to paddle/legacy/gserver/layers/MKLDNNPoolLayer.h diff --git a/paddle/gserver/layers/MKLPackedRecurrentLayer.cpp b/paddle/legacy/gserver/layers/MKLPackedRecurrentLayer.cpp similarity index 100% rename from paddle/gserver/layers/MKLPackedRecurrentLayer.cpp rename to paddle/legacy/gserver/layers/MKLPackedRecurrentLayer.cpp diff --git a/paddle/gserver/layers/MKLPackedRecurrentLayer.h b/paddle/legacy/gserver/layers/MKLPackedRecurrentLayer.h similarity index 100% rename from paddle/gserver/layers/MKLPackedRecurrentLayer.h rename to paddle/legacy/gserver/layers/MKLPackedRecurrentLayer.h diff --git a/paddle/gserver/layers/MKLPackedWeight.h b/paddle/legacy/gserver/layers/MKLPackedWeight.h similarity index 94% rename from paddle/gserver/layers/MKLPackedWeight.h rename to paddle/legacy/gserver/layers/MKLPackedWeight.h index b01a961d00..47f225bd03 100644 --- a/paddle/gserver/layers/MKLPackedWeight.h +++ b/paddle/legacy/gserver/layers/MKLPackedWeight.h @@ -14,9 +14,9 @@ limitations under the License. */ #pragma once -#include "paddle/math/MathFunctions.h" -#include "paddle/parameter/Parameter.h" -#include "paddle/parameter/Weight.h" +#include "paddle/legacy/math/MathFunctions.h" +#include "paddle/legacy/parameter/Parameter.h" +#include "paddle/legacy/parameter/Weight.h" namespace paddle { diff --git a/paddle/gserver/layers/MaxIdLayer.cpp b/paddle/legacy/gserver/layers/MaxIdLayer.cpp similarity index 100% rename from paddle/gserver/layers/MaxIdLayer.cpp rename to paddle/legacy/gserver/layers/MaxIdLayer.cpp diff --git a/paddle/gserver/layers/MaxLayer.cpp b/paddle/legacy/gserver/layers/MaxLayer.cpp similarity index 96% rename from paddle/gserver/layers/MaxLayer.cpp rename to paddle/legacy/gserver/layers/MaxLayer.cpp index 7ee2e0dd94..b51251b663 100644 --- a/paddle/gserver/layers/MaxLayer.cpp +++ b/paddle/legacy/gserver/layers/MaxLayer.cpp @@ -13,8 +13,8 @@ See the License for the specific language governing permissions and limitations under the License. */ #include "MaxLayer.h" -#include "paddle/utils/Logging.h" -#include "paddle/utils/Stat.h" +#include "paddle/legacy/utils/Logging.h" +#include "paddle/legacy/utils/Stat.h" namespace paddle { diff --git a/paddle/gserver/layers/MaxLayer.h b/paddle/legacy/gserver/layers/MaxLayer.h similarity index 95% rename from paddle/gserver/layers/MaxLayer.h rename to paddle/legacy/gserver/layers/MaxLayer.h index e46f997c34..12d0128e39 100644 --- a/paddle/gserver/layers/MaxLayer.h +++ b/paddle/legacy/gserver/layers/MaxLayer.h @@ -15,8 +15,8 @@ limitations under the License. */ #pragma once #include "SequencePoolLayer.h" -#include "paddle/math/Matrix.h" -#include "paddle/utils/ThreadLocal.h" +#include "paddle/legacy/math/Matrix.h" +#include "paddle/legacy/utils/ThreadLocal.h" namespace paddle { diff --git a/paddle/gserver/layers/MaxOutLayer.cpp b/paddle/legacy/gserver/layers/MaxOutLayer.cpp similarity index 100% rename from paddle/gserver/layers/MaxOutLayer.cpp rename to paddle/legacy/gserver/layers/MaxOutLayer.cpp diff --git a/paddle/gserver/layers/MaxOutLayer.h b/paddle/legacy/gserver/layers/MaxOutLayer.h similarity index 97% rename from paddle/gserver/layers/MaxOutLayer.h rename to paddle/legacy/gserver/layers/MaxOutLayer.h index 0eb8674b4c..e56f34b8e0 100644 --- a/paddle/gserver/layers/MaxOutLayer.h +++ b/paddle/legacy/gserver/layers/MaxOutLayer.h @@ -15,7 +15,7 @@ limitations under the License. */ #pragma once #include "Layer.h" -#include "paddle/math/Matrix.h" +#include "paddle/legacy/math/Matrix.h" namespace paddle { diff --git a/paddle/gserver/layers/MaxPoolWithMaskLayer.cpp b/paddle/legacy/gserver/layers/MaxPoolWithMaskLayer.cpp similarity index 97% rename from paddle/gserver/layers/MaxPoolWithMaskLayer.cpp rename to paddle/legacy/gserver/layers/MaxPoolWithMaskLayer.cpp index e594e22b5e..a1cc59a719 100644 --- a/paddle/gserver/layers/MaxPoolWithMaskLayer.cpp +++ b/paddle/legacy/gserver/layers/MaxPoolWithMaskLayer.cpp @@ -13,8 +13,8 @@ See the License for the specific language governing permissions and limitations under the License. */ #include "MaxPoolWithMaskLayer.h" -#include "paddle/utils/Logging.h" -#include "paddle/utils/Stat.h" +#include "paddle/legacy/utils/Logging.h" +#include "paddle/legacy/utils/Stat.h" namespace paddle { diff --git a/paddle/gserver/layers/MaxPoolWithMaskLayer.h b/paddle/legacy/gserver/layers/MaxPoolWithMaskLayer.h similarity index 96% rename from paddle/gserver/layers/MaxPoolWithMaskLayer.h rename to paddle/legacy/gserver/layers/MaxPoolWithMaskLayer.h index c948364f6b..fcd5388abe 100644 --- a/paddle/gserver/layers/MaxPoolWithMaskLayer.h +++ b/paddle/legacy/gserver/layers/MaxPoolWithMaskLayer.h @@ -16,7 +16,7 @@ limitations under the License. */ #include #include "PoolLayer.h" -#include "paddle/math/Matrix.h" +#include "paddle/legacy/math/Matrix.h" namespace paddle { /** diff --git a/paddle/gserver/layers/MixedLayer.cpp b/paddle/legacy/gserver/layers/MixedLayer.cpp similarity index 99% rename from paddle/gserver/layers/MixedLayer.cpp rename to paddle/legacy/gserver/layers/MixedLayer.cpp index 7dcb30b98d..63e658c09c 100644 --- a/paddle/gserver/layers/MixedLayer.cpp +++ b/paddle/legacy/gserver/layers/MixedLayer.cpp @@ -13,7 +13,7 @@ See the License for the specific language governing permissions and limitations under the License. */ #include "MixedLayer.h" -#include "paddle/utils/Stat.h" +#include "paddle/legacy/utils/Stat.h" namespace paddle { diff --git a/paddle/gserver/layers/MixedLayer.h b/paddle/legacy/gserver/layers/MixedLayer.h similarity index 100% rename from paddle/gserver/layers/MixedLayer.h rename to paddle/legacy/gserver/layers/MixedLayer.h diff --git a/paddle/gserver/layers/MultiBoxLossLayer.cpp b/paddle/legacy/gserver/layers/MultiBoxLossLayer.cpp similarity index 100% rename from paddle/gserver/layers/MultiBoxLossLayer.cpp rename to paddle/legacy/gserver/layers/MultiBoxLossLayer.cpp diff --git a/paddle/gserver/layers/MultiBoxLossLayer.h b/paddle/legacy/gserver/layers/MultiBoxLossLayer.h similarity index 100% rename from paddle/gserver/layers/MultiBoxLossLayer.h rename to paddle/legacy/gserver/layers/MultiBoxLossLayer.h diff --git a/paddle/gserver/layers/MultinomialSampler.cpp b/paddle/legacy/gserver/layers/MultinomialSampler.cpp similarity index 100% rename from paddle/gserver/layers/MultinomialSampler.cpp rename to paddle/legacy/gserver/layers/MultinomialSampler.cpp diff --git a/paddle/gserver/layers/MultinomialSampler.h b/paddle/legacy/gserver/layers/MultinomialSampler.h similarity index 98% rename from paddle/gserver/layers/MultinomialSampler.h rename to paddle/legacy/gserver/layers/MultinomialSampler.h index 8cbb229f15..ed44535241 100644 --- a/paddle/gserver/layers/MultinomialSampler.h +++ b/paddle/legacy/gserver/layers/MultinomialSampler.h @@ -16,7 +16,7 @@ limitations under the License. */ #include #include -#include "paddle/utils/Common.h" +#include "paddle/legacy/utils/Common.h" namespace paddle { diff --git a/paddle/gserver/layers/MultiplexLayer.cpp b/paddle/legacy/gserver/layers/MultiplexLayer.cpp similarity index 97% rename from paddle/gserver/layers/MultiplexLayer.cpp rename to paddle/legacy/gserver/layers/MultiplexLayer.cpp index 43ecc48cd9..9ca2b24175 100644 --- a/paddle/gserver/layers/MultiplexLayer.cpp +++ b/paddle/legacy/gserver/layers/MultiplexLayer.cpp @@ -13,9 +13,9 @@ See the License for the specific language governing permissions and limitations under the License. */ #include "Layer.h" -#include "paddle/math/Matrix.h" -#include "paddle/utils/Logging.h" -#include "paddle/utils/Stat.h" +#include "paddle/legacy/math/Matrix.h" +#include "paddle/legacy/utils/Logging.h" +#include "paddle/legacy/utils/Stat.h" namespace paddle { diff --git a/paddle/gserver/layers/NCELayer.cpp b/paddle/legacy/gserver/layers/NCELayer.cpp similarity index 99% rename from paddle/gserver/layers/NCELayer.cpp rename to paddle/legacy/gserver/layers/NCELayer.cpp index cc48fe100f..ae4d640816 100644 --- a/paddle/gserver/layers/NCELayer.cpp +++ b/paddle/legacy/gserver/layers/NCELayer.cpp @@ -16,7 +16,7 @@ limitations under the License. */ #include "Layer.h" #include "MultinomialSampler.h" -#include "paddle/math/MathFunctions.h" +#include "paddle/legacy/math/MathFunctions.h" namespace paddle { diff --git a/paddle/gserver/layers/NormLayer.cpp b/paddle/legacy/gserver/layers/NormLayer.cpp similarity index 97% rename from paddle/gserver/layers/NormLayer.cpp rename to paddle/legacy/gserver/layers/NormLayer.cpp index 4678f6fa9a..443e26dbc8 100644 --- a/paddle/gserver/layers/NormLayer.cpp +++ b/paddle/legacy/gserver/layers/NormLayer.cpp @@ -14,7 +14,7 @@ limitations under the License. */ #include "NormLayer.h" #include "NormProjectionLayer.h" -#include "paddle/utils/Logging.h" +#include "paddle/legacy/utils/Logging.h" namespace paddle { REGISTER_LAYER_CREATE_FUNC(norm, &NormLayer::create); diff --git a/paddle/gserver/layers/NormLayer.h b/paddle/legacy/gserver/layers/NormLayer.h similarity index 98% rename from paddle/gserver/layers/NormLayer.h rename to paddle/legacy/gserver/layers/NormLayer.h index 3807584415..5ac00034d0 100644 --- a/paddle/gserver/layers/NormLayer.h +++ b/paddle/legacy/gserver/layers/NormLayer.h @@ -17,7 +17,7 @@ limitations under the License. */ #include #include "Layer.h" #include "NormLayer.h" -#include "paddle/math/Matrix.h" +#include "paddle/legacy/math/Matrix.h" namespace paddle { diff --git a/paddle/gserver/layers/NormProjectionLayer.cpp b/paddle/legacy/gserver/layers/NormProjectionLayer.cpp similarity index 97% rename from paddle/gserver/layers/NormProjectionLayer.cpp rename to paddle/legacy/gserver/layers/NormProjectionLayer.cpp index 3013bbdbc7..72affaa1ce 100644 --- a/paddle/gserver/layers/NormProjectionLayer.cpp +++ b/paddle/legacy/gserver/layers/NormProjectionLayer.cpp @@ -13,8 +13,8 @@ See the License for the specific language governing permissions and limitations under the License. */ #include "NormProjectionLayer.h" -#include "paddle/utils/Logging.h" -#include "paddle/utils/Stat.h" +#include "paddle/legacy/utils/Logging.h" +#include "paddle/legacy/utils/Stat.h" namespace paddle { size_t CMRProjectionNormLayer::getSize() { diff --git a/paddle/gserver/layers/NormProjectionLayer.h b/paddle/legacy/gserver/layers/NormProjectionLayer.h similarity index 97% rename from paddle/gserver/layers/NormProjectionLayer.h rename to paddle/legacy/gserver/layers/NormProjectionLayer.h index 64803a1603..492d1fcb72 100644 --- a/paddle/gserver/layers/NormProjectionLayer.h +++ b/paddle/legacy/gserver/layers/NormProjectionLayer.h @@ -16,7 +16,7 @@ limitations under the License. */ #include #include "NormLayer.h" -#include "paddle/math/Matrix.h" +#include "paddle/legacy/math/Matrix.h" namespace paddle { diff --git a/paddle/gserver/layers/Operator.cpp b/paddle/legacy/gserver/layers/Operator.cpp similarity index 100% rename from paddle/gserver/layers/Operator.cpp rename to paddle/legacy/gserver/layers/Operator.cpp diff --git a/paddle/gserver/layers/Operator.h b/paddle/legacy/gserver/layers/Operator.h similarity index 96% rename from paddle/gserver/layers/Operator.h rename to paddle/legacy/gserver/layers/Operator.h index 42d525ef3e..20a248985e 100644 --- a/paddle/gserver/layers/Operator.h +++ b/paddle/legacy/gserver/layers/Operator.h @@ -15,10 +15,10 @@ limitations under the License. */ #pragma once #include "ModelConfig.pb.h" -#include "paddle/parameter/Parameter.h" +#include "paddle/legacy/parameter/Parameter.h" #include "Layer.h" -#include "paddle/parameter/Argument.h" +#include "paddle/legacy/parameter/Argument.h" namespace paddle { diff --git a/paddle/gserver/layers/OuterProdLayer.cpp b/paddle/legacy/gserver/layers/OuterProdLayer.cpp similarity index 97% rename from paddle/gserver/layers/OuterProdLayer.cpp rename to paddle/legacy/gserver/layers/OuterProdLayer.cpp index 11a910f331..d0928be9d4 100644 --- a/paddle/gserver/layers/OuterProdLayer.cpp +++ b/paddle/legacy/gserver/layers/OuterProdLayer.cpp @@ -13,9 +13,9 @@ See the License for the specific language governing permissions and limitations under the License. */ #include "Layer.h" -#include "paddle/math/Matrix.h" -#include "paddle/utils/Logging.h" -#include "paddle/utils/Stat.h" +#include "paddle/legacy/math/Matrix.h" +#include "paddle/legacy/utils/Logging.h" +#include "paddle/legacy/utils/Stat.h" namespace paddle { diff --git a/paddle/gserver/layers/PadLayer.cpp b/paddle/legacy/gserver/layers/PadLayer.cpp similarity index 98% rename from paddle/gserver/layers/PadLayer.cpp rename to paddle/legacy/gserver/layers/PadLayer.cpp index b1910e108b..7b92b3de2d 100644 --- a/paddle/gserver/layers/PadLayer.cpp +++ b/paddle/legacy/gserver/layers/PadLayer.cpp @@ -13,7 +13,7 @@ See the License for the specific language governing permissions and limitations under the License. */ #include "PadLayer.h" -#include "paddle/utils/Stat.h" +#include "paddle/legacy/utils/Stat.h" namespace paddle { diff --git a/paddle/gserver/layers/PadLayer.h b/paddle/legacy/gserver/layers/PadLayer.h similarity index 100% rename from paddle/gserver/layers/PadLayer.h rename to paddle/legacy/gserver/layers/PadLayer.h diff --git a/paddle/gserver/layers/ParameterReluLayer.cpp b/paddle/legacy/gserver/layers/ParameterReluLayer.cpp similarity index 96% rename from paddle/gserver/layers/ParameterReluLayer.cpp rename to paddle/legacy/gserver/layers/ParameterReluLayer.cpp index 12d04fc1c3..23715d1975 100644 --- a/paddle/gserver/layers/ParameterReluLayer.cpp +++ b/paddle/legacy/gserver/layers/ParameterReluLayer.cpp @@ -13,8 +13,8 @@ See the License for the specific language governing permissions and limitations under the License. */ #include "ParameterReluLayer.h" -#include "paddle/utils/Logging.h" -#include "paddle/utils/Stat.h" +#include "paddle/legacy/utils/Logging.h" +#include "paddle/legacy/utils/Stat.h" namespace paddle { diff --git a/paddle/gserver/layers/ParameterReluLayer.h b/paddle/legacy/gserver/layers/ParameterReluLayer.h similarity index 95% rename from paddle/gserver/layers/ParameterReluLayer.h rename to paddle/legacy/gserver/layers/ParameterReluLayer.h index 4553413fcd..3aac4b42f6 100644 --- a/paddle/gserver/layers/ParameterReluLayer.h +++ b/paddle/legacy/gserver/layers/ParameterReluLayer.h @@ -15,8 +15,8 @@ limitations under the License. */ #pragma once #include "Layer.h" -#include "paddle/math/Matrix.h" -#include "paddle/utils/ThreadLocal.h" +#include "paddle/legacy/math/Matrix.h" +#include "paddle/legacy/utils/ThreadLocal.h" namespace paddle { diff --git a/paddle/gserver/layers/Pool3DLayer.cpp b/paddle/legacy/gserver/layers/Pool3DLayer.cpp similarity index 99% rename from paddle/gserver/layers/Pool3DLayer.cpp rename to paddle/legacy/gserver/layers/Pool3DLayer.cpp index 3ac9eb0d81..ae3f55c27f 100644 --- a/paddle/gserver/layers/Pool3DLayer.cpp +++ b/paddle/legacy/gserver/layers/Pool3DLayer.cpp @@ -14,7 +14,7 @@ limitations under the License. */ #include "Pool3DLayer.h" #include "PoolProjectionLayer.h" -#include "paddle/utils/Logging.h" +#include "paddle/legacy/utils/Logging.h" namespace paddle { diff --git a/paddle/gserver/layers/Pool3DLayer.h b/paddle/legacy/gserver/layers/Pool3DLayer.h similarity index 94% rename from paddle/gserver/layers/Pool3DLayer.h rename to paddle/legacy/gserver/layers/Pool3DLayer.h index 32605f8b70..6851c44ab2 100644 --- a/paddle/gserver/layers/Pool3DLayer.h +++ b/paddle/legacy/gserver/layers/Pool3DLayer.h @@ -16,8 +16,8 @@ limitations under the License. */ #include #include "Layer.h" -#include "paddle/math/MathUtils.h" -#include "paddle/math/Matrix.h" +#include "paddle/legacy/math/MathUtils.h" +#include "paddle/legacy/math/Matrix.h" namespace paddle { diff --git a/paddle/gserver/layers/PoolLayer.cpp b/paddle/legacy/gserver/layers/PoolLayer.cpp similarity index 98% rename from paddle/gserver/layers/PoolLayer.cpp rename to paddle/legacy/gserver/layers/PoolLayer.cpp index ee589e6be5..df172d9575 100644 --- a/paddle/gserver/layers/PoolLayer.cpp +++ b/paddle/legacy/gserver/layers/PoolLayer.cpp @@ -15,7 +15,7 @@ limitations under the License. */ #include "PoolLayer.h" #include "MaxPoolWithMaskLayer.h" #include "PoolProjectionLayer.h" -#include "paddle/utils/Logging.h" +#include "paddle/legacy/utils/Logging.h" #ifdef PADDLE_WITH_CUDA #include "CudnnPoolLayer.h" #endif diff --git a/paddle/gserver/layers/PoolLayer.h b/paddle/legacy/gserver/layers/PoolLayer.h similarity index 94% rename from paddle/gserver/layers/PoolLayer.h rename to paddle/legacy/gserver/layers/PoolLayer.h index 99f8f148e2..0808dfae84 100644 --- a/paddle/gserver/layers/PoolLayer.h +++ b/paddle/legacy/gserver/layers/PoolLayer.h @@ -16,8 +16,8 @@ limitations under the License. */ #include #include "Layer.h" -#include "paddle/math/MathUtils.h" -#include "paddle/math/Matrix.h" +#include "paddle/legacy/math/MathUtils.h" +#include "paddle/legacy/math/Matrix.h" namespace paddle { diff --git a/paddle/gserver/layers/PoolProjection.cpp b/paddle/legacy/gserver/layers/PoolProjection.cpp similarity index 100% rename from paddle/gserver/layers/PoolProjection.cpp rename to paddle/legacy/gserver/layers/PoolProjection.cpp diff --git a/paddle/gserver/layers/PoolProjection.h b/paddle/legacy/gserver/layers/PoolProjection.h similarity index 97% rename from paddle/gserver/layers/PoolProjection.h rename to paddle/legacy/gserver/layers/PoolProjection.h index 8004cc1550..d01b6a13f0 100644 --- a/paddle/gserver/layers/PoolProjection.h +++ b/paddle/legacy/gserver/layers/PoolProjection.h @@ -15,7 +15,7 @@ limitations under the License. */ #pragma once #include "Projection.h" -#include "paddle/math/MathUtils.h" +#include "paddle/legacy/math/MathUtils.h" namespace paddle { diff --git a/paddle/gserver/layers/PoolProjectionLayer.cpp b/paddle/legacy/gserver/layers/PoolProjectionLayer.cpp similarity index 96% rename from paddle/gserver/layers/PoolProjectionLayer.cpp rename to paddle/legacy/gserver/layers/PoolProjectionLayer.cpp index 73d320e67e..e44b1d7ba1 100644 --- a/paddle/gserver/layers/PoolProjectionLayer.cpp +++ b/paddle/legacy/gserver/layers/PoolProjectionLayer.cpp @@ -13,8 +13,8 @@ See the License for the specific language governing permissions and limitations under the License. */ #include "PoolProjectionLayer.h" -#include "paddle/utils/Logging.h" -#include "paddle/utils/Stat.h" +#include "paddle/legacy/utils/Logging.h" +#include "paddle/legacy/utils/Stat.h" namespace paddle { diff --git a/paddle/gserver/layers/PoolProjectionLayer.h b/paddle/legacy/gserver/layers/PoolProjectionLayer.h similarity index 97% rename from paddle/gserver/layers/PoolProjectionLayer.h rename to paddle/legacy/gserver/layers/PoolProjectionLayer.h index 9ad144cc2a..fcd35bbba4 100644 --- a/paddle/gserver/layers/PoolProjectionLayer.h +++ b/paddle/legacy/gserver/layers/PoolProjectionLayer.h @@ -17,7 +17,7 @@ limitations under the License. */ #include #include "PoolLayer.h" #include "PoolProjection.h" -#include "paddle/math/Matrix.h" +#include "paddle/legacy/math/Matrix.h" namespace paddle { /** diff --git a/paddle/gserver/layers/PowerLayer.cpp b/paddle/legacy/gserver/layers/PowerLayer.cpp similarity index 96% rename from paddle/gserver/layers/PowerLayer.cpp rename to paddle/legacy/gserver/layers/PowerLayer.cpp index 7e8d60db8f..5e94c64db6 100644 --- a/paddle/gserver/layers/PowerLayer.cpp +++ b/paddle/legacy/gserver/layers/PowerLayer.cpp @@ -13,9 +13,9 @@ See the License for the specific language governing permissions and limitations under the License. */ #include "Layer.h" -#include "paddle/math/Matrix.h" -#include "paddle/utils/Logging.h" -#include "paddle/utils/Stat.h" +#include "paddle/legacy/math/Matrix.h" +#include "paddle/legacy/utils/Logging.h" +#include "paddle/legacy/utils/Stat.h" namespace paddle { diff --git a/paddle/gserver/layers/PrintLayer.cpp b/paddle/legacy/gserver/layers/PrintLayer.cpp similarity index 100% rename from paddle/gserver/layers/PrintLayer.cpp rename to paddle/legacy/gserver/layers/PrintLayer.cpp diff --git a/paddle/gserver/layers/PriorBox.cpp b/paddle/legacy/gserver/layers/PriorBox.cpp similarity index 98% rename from paddle/gserver/layers/PriorBox.cpp rename to paddle/legacy/gserver/layers/PriorBox.cpp index 39d2c2d737..83aab6e366 100644 --- a/paddle/gserver/layers/PriorBox.cpp +++ b/paddle/legacy/gserver/layers/PriorBox.cpp @@ -13,8 +13,8 @@ See the License for the specific language governing permissions and limitations under the License. */ #include "Layer.h" -#include "paddle/math/BaseMatrix.h" -#include "paddle/math/Matrix.h" +#include "paddle/legacy/math/BaseMatrix.h" +#include "paddle/legacy/math/Matrix.h" namespace paddle { /** diff --git a/paddle/gserver/layers/Projection.cpp b/paddle/legacy/gserver/layers/Projection.cpp similarity index 100% rename from paddle/gserver/layers/Projection.cpp rename to paddle/legacy/gserver/layers/Projection.cpp diff --git a/paddle/gserver/layers/Projection.h b/paddle/legacy/gserver/layers/Projection.h similarity index 98% rename from paddle/gserver/layers/Projection.h rename to paddle/legacy/gserver/layers/Projection.h index 88a41355cf..974f5a2cac 100644 --- a/paddle/gserver/layers/Projection.h +++ b/paddle/legacy/gserver/layers/Projection.h @@ -16,7 +16,7 @@ limitations under the License. */ #include "Layer.h" #include "ModelConfig.pb.h" -#include "paddle/parameter/Parameter.h" +#include "paddle/legacy/parameter/Parameter.h" namespace paddle { diff --git a/paddle/gserver/layers/ROIPoolLayer.cpp b/paddle/legacy/gserver/layers/ROIPoolLayer.cpp similarity index 100% rename from paddle/gserver/layers/ROIPoolLayer.cpp rename to paddle/legacy/gserver/layers/ROIPoolLayer.cpp diff --git a/paddle/gserver/layers/ROIPoolLayer.h b/paddle/legacy/gserver/layers/ROIPoolLayer.h similarity index 100% rename from paddle/gserver/layers/ROIPoolLayer.h rename to paddle/legacy/gserver/layers/ROIPoolLayer.h diff --git a/paddle/gserver/layers/RecurrentLayer.cpp b/paddle/legacy/gserver/layers/RecurrentLayer.cpp similarity index 100% rename from paddle/gserver/layers/RecurrentLayer.cpp rename to paddle/legacy/gserver/layers/RecurrentLayer.cpp diff --git a/paddle/gserver/layers/RecurrentLayer.h b/paddle/legacy/gserver/layers/RecurrentLayer.h similarity index 99% rename from paddle/gserver/layers/RecurrentLayer.h rename to paddle/legacy/gserver/layers/RecurrentLayer.h index 94e633e657..287ea27a09 100644 --- a/paddle/gserver/layers/RecurrentLayer.h +++ b/paddle/legacy/gserver/layers/RecurrentLayer.h @@ -15,7 +15,7 @@ limitations under the License. */ #include #include "Layer.h" #include "SequenceToBatch.h" -#include "paddle/utils/Stat.h" +#include "paddle/legacy/utils/Stat.h" namespace paddle { diff --git a/paddle/gserver/layers/RecurrentLayerGroup.cpp b/paddle/legacy/gserver/layers/RecurrentLayerGroup.cpp similarity index 95% rename from paddle/gserver/layers/RecurrentLayerGroup.cpp rename to paddle/legacy/gserver/layers/RecurrentLayerGroup.cpp index 6694e8f299..3932124599 100644 --- a/paddle/gserver/layers/RecurrentLayerGroup.cpp +++ b/paddle/legacy/gserver/layers/RecurrentLayerGroup.cpp @@ -13,10 +13,10 @@ See the License for the specific language governing permissions and limitations under the License. */ #include -#include "paddle/gserver/layers/Layer.h" +#include "paddle/legacy/gserver/layers/Layer.h" -#include "paddle/gserver/gradientmachines/RecurrentGradientMachine.h" -#include "paddle/utils/Stat.h" +#include "paddle/legacy/gserver/gradientmachines/RecurrentGradientMachine.h" +#include "paddle/legacy/utils/Stat.h" namespace paddle { diff --git a/paddle/gserver/layers/ResizeLayer.cpp b/paddle/legacy/gserver/layers/ResizeLayer.cpp similarity index 96% rename from paddle/gserver/layers/ResizeLayer.cpp rename to paddle/legacy/gserver/layers/ResizeLayer.cpp index d4ae994593..8f8aad820f 100644 --- a/paddle/gserver/layers/ResizeLayer.cpp +++ b/paddle/legacy/gserver/layers/ResizeLayer.cpp @@ -13,8 +13,8 @@ See the License for the specific language governing permissions and limitations under the License. */ #include "Layer.h" -#include "paddle/math/BaseMatrix.h" -#include "paddle/math/Matrix.h" +#include "paddle/legacy/math/BaseMatrix.h" +#include "paddle/legacy/math/Matrix.h" namespace paddle { /** diff --git a/paddle/gserver/layers/RotateLayer.cpp b/paddle/legacy/gserver/layers/RotateLayer.cpp similarity index 100% rename from paddle/gserver/layers/RotateLayer.cpp rename to paddle/legacy/gserver/layers/RotateLayer.cpp diff --git a/paddle/gserver/layers/RotateLayer.h b/paddle/legacy/gserver/layers/RotateLayer.h similarity index 97% rename from paddle/gserver/layers/RotateLayer.h rename to paddle/legacy/gserver/layers/RotateLayer.h index 7ecbff2016..498e24372b 100644 --- a/paddle/gserver/layers/RotateLayer.h +++ b/paddle/legacy/gserver/layers/RotateLayer.h @@ -15,7 +15,7 @@ limitations under the License. */ #pragma once #include "Layer.h" -#include "paddle/math/Matrix.h" +#include "paddle/legacy/math/Matrix.h" namespace paddle { /** diff --git a/paddle/gserver/layers/RowConvLayer.cpp b/paddle/legacy/gserver/layers/RowConvLayer.cpp similarity index 98% rename from paddle/gserver/layers/RowConvLayer.cpp rename to paddle/legacy/gserver/layers/RowConvLayer.cpp index 63b499e486..1961557dc2 100644 --- a/paddle/gserver/layers/RowConvLayer.cpp +++ b/paddle/legacy/gserver/layers/RowConvLayer.cpp @@ -13,7 +13,7 @@ See the License for the specific language governing permissions and limitations under the License. */ #include "RowConvLayer.h" -#include "paddle/utils/Stat.h" +#include "paddle/legacy/utils/Stat.h" namespace paddle { diff --git a/paddle/gserver/layers/RowConvLayer.h b/paddle/legacy/gserver/layers/RowConvLayer.h similarity index 100% rename from paddle/gserver/layers/RowConvLayer.h rename to paddle/legacy/gserver/layers/RowConvLayer.h diff --git a/paddle/gserver/layers/RowL2NormLayer.cpp b/paddle/legacy/gserver/layers/RowL2NormLayer.cpp similarity index 100% rename from paddle/gserver/layers/RowL2NormLayer.cpp rename to paddle/legacy/gserver/layers/RowL2NormLayer.cpp diff --git a/paddle/gserver/layers/SamplingIdLayer.cpp b/paddle/legacy/gserver/layers/SamplingIdLayer.cpp similarity index 100% rename from paddle/gserver/layers/SamplingIdLayer.cpp rename to paddle/legacy/gserver/layers/SamplingIdLayer.cpp diff --git a/paddle/gserver/layers/ScaleShiftLayer.cpp b/paddle/legacy/gserver/layers/ScaleShiftLayer.cpp similarity index 100% rename from paddle/gserver/layers/ScaleShiftLayer.cpp rename to paddle/legacy/gserver/layers/ScaleShiftLayer.cpp diff --git a/paddle/gserver/layers/ScaleSubRegionLayer.cpp b/paddle/legacy/gserver/layers/ScaleSubRegionLayer.cpp similarity index 98% rename from paddle/gserver/layers/ScaleSubRegionLayer.cpp rename to paddle/legacy/gserver/layers/ScaleSubRegionLayer.cpp index 68a0ff7358..70d44d2a7e 100644 --- a/paddle/gserver/layers/ScaleSubRegionLayer.cpp +++ b/paddle/legacy/gserver/layers/ScaleSubRegionLayer.cpp @@ -13,7 +13,7 @@ See the License for the specific language governing permissions and limitations under the License. */ #include "ScaleSubRegionLayer.h" -#include "paddle/utils/Stat.h" +#include "paddle/legacy/utils/Stat.h" namespace paddle { REGISTER_LAYER(scale_sub_region, ScaleSubRegionLayer); diff --git a/paddle/gserver/layers/ScaleSubRegionLayer.h b/paddle/legacy/gserver/layers/ScaleSubRegionLayer.h similarity index 100% rename from paddle/gserver/layers/ScaleSubRegionLayer.h rename to paddle/legacy/gserver/layers/ScaleSubRegionLayer.h diff --git a/paddle/gserver/layers/ScalingLayer.cpp b/paddle/legacy/gserver/layers/ScalingLayer.cpp similarity index 95% rename from paddle/gserver/layers/ScalingLayer.cpp rename to paddle/legacy/gserver/layers/ScalingLayer.cpp index 15e07daebe..a8286b6614 100644 --- a/paddle/gserver/layers/ScalingLayer.cpp +++ b/paddle/legacy/gserver/layers/ScalingLayer.cpp @@ -13,9 +13,9 @@ See the License for the specific language governing permissions and limitations under the License. */ #include "Layer.h" -#include "paddle/math/Matrix.h" -#include "paddle/utils/Logging.h" -#include "paddle/utils/Stat.h" +#include "paddle/legacy/math/Matrix.h" +#include "paddle/legacy/utils/Logging.h" +#include "paddle/legacy/utils/Stat.h" namespace paddle { diff --git a/paddle/gserver/layers/ScalingProjection.cpp b/paddle/legacy/gserver/layers/ScalingProjection.cpp similarity index 100% rename from paddle/gserver/layers/ScalingProjection.cpp rename to paddle/legacy/gserver/layers/ScalingProjection.cpp diff --git a/paddle/gserver/layers/SelectiveFullyConnectedLayer.cpp b/paddle/legacy/gserver/layers/SelectiveFullyConnectedLayer.cpp similarity index 99% rename from paddle/gserver/layers/SelectiveFullyConnectedLayer.cpp rename to paddle/legacy/gserver/layers/SelectiveFullyConnectedLayer.cpp index 43c98993f3..72fb068148 100644 --- a/paddle/gserver/layers/SelectiveFullyConnectedLayer.cpp +++ b/paddle/legacy/gserver/layers/SelectiveFullyConnectedLayer.cpp @@ -15,9 +15,9 @@ limitations under the License. */ #include "SelectiveFullyConnectedLayer.h" #include #include -#include "paddle/math/SparseMatrix.h" -#include "paddle/utils/Logging.h" -#include "paddle/utils/Stat.h" +#include "paddle/legacy/math/SparseMatrix.h" +#include "paddle/legacy/utils/Logging.h" +#include "paddle/legacy/utils/Stat.h" namespace paddle { diff --git a/paddle/gserver/layers/SelectiveFullyConnectedLayer.h b/paddle/legacy/gserver/layers/SelectiveFullyConnectedLayer.h similarity index 97% rename from paddle/gserver/layers/SelectiveFullyConnectedLayer.h rename to paddle/legacy/gserver/layers/SelectiveFullyConnectedLayer.h index 4b32ce8b16..3ba04d9b2a 100644 --- a/paddle/gserver/layers/SelectiveFullyConnectedLayer.h +++ b/paddle/legacy/gserver/layers/SelectiveFullyConnectedLayer.h @@ -15,8 +15,8 @@ limitations under the License. */ #pragma once #include "Layer.h" -#include "paddle/math/Matrix.h" -#include "paddle/utils/ThreadLocal.h" +#include "paddle/legacy/math/Matrix.h" +#include "paddle/legacy/utils/ThreadLocal.h" namespace paddle { diff --git a/paddle/gserver/layers/SequenceConcatLayer.cpp b/paddle/legacy/gserver/layers/SequenceConcatLayer.cpp similarity index 98% rename from paddle/gserver/layers/SequenceConcatLayer.cpp rename to paddle/legacy/gserver/layers/SequenceConcatLayer.cpp index c84c3ce4f0..7b598e11ac 100644 --- a/paddle/gserver/layers/SequenceConcatLayer.cpp +++ b/paddle/legacy/gserver/layers/SequenceConcatLayer.cpp @@ -13,9 +13,9 @@ See the License for the specific language governing permissions and limitations under the License. */ #include "Layer.h" -#include "paddle/math/Matrix.h" -#include "paddle/utils/Logging.h" -#include "paddle/utils/Stat.h" +#include "paddle/legacy/math/Matrix.h" +#include "paddle/legacy/utils/Logging.h" +#include "paddle/legacy/utils/Stat.h" namespace paddle { diff --git a/paddle/gserver/layers/SequenceLastInstanceLayer.cpp b/paddle/legacy/gserver/layers/SequenceLastInstanceLayer.cpp similarity index 96% rename from paddle/gserver/layers/SequenceLastInstanceLayer.cpp rename to paddle/legacy/gserver/layers/SequenceLastInstanceLayer.cpp index 28d0a9296d..8735d71ba3 100644 --- a/paddle/gserver/layers/SequenceLastInstanceLayer.cpp +++ b/paddle/legacy/gserver/layers/SequenceLastInstanceLayer.cpp @@ -12,11 +12,11 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. */ -#include "paddle/utils/Logging.h" +#include "paddle/legacy/utils/Logging.h" #include "SequencePoolLayer.h" -#include "paddle/math/Matrix.h" -#include "paddle/utils/Stat.h" +#include "paddle/legacy/math/Matrix.h" +#include "paddle/legacy/utils/Stat.h" namespace paddle { diff --git a/paddle/gserver/layers/SequencePoolLayer.cpp b/paddle/legacy/gserver/layers/SequencePoolLayer.cpp similarity index 98% rename from paddle/gserver/layers/SequencePoolLayer.cpp rename to paddle/legacy/gserver/layers/SequencePoolLayer.cpp index 650ab425d1..243b795db4 100644 --- a/paddle/gserver/layers/SequencePoolLayer.cpp +++ b/paddle/legacy/gserver/layers/SequencePoolLayer.cpp @@ -13,7 +13,7 @@ See the License for the specific language governing permissions and limitations under the License. */ #include "SequencePoolLayer.h" -#include "paddle/utils/Logging.h" +#include "paddle/legacy/utils/Logging.h" namespace paddle { diff --git a/paddle/gserver/layers/SequencePoolLayer.h b/paddle/legacy/gserver/layers/SequencePoolLayer.h similarity index 98% rename from paddle/gserver/layers/SequencePoolLayer.h rename to paddle/legacy/gserver/layers/SequencePoolLayer.h index 01183060af..1c019b3130 100644 --- a/paddle/gserver/layers/SequencePoolLayer.h +++ b/paddle/legacy/gserver/layers/SequencePoolLayer.h @@ -15,7 +15,7 @@ limitations under the License. */ #pragma once #include "Layer.h" -#include "paddle/math/Matrix.h" +#include "paddle/legacy/math/Matrix.h" namespace paddle { /** diff --git a/paddle/gserver/layers/SequenceReshapeLayer.cpp b/paddle/legacy/gserver/layers/SequenceReshapeLayer.cpp similarity index 97% rename from paddle/gserver/layers/SequenceReshapeLayer.cpp rename to paddle/legacy/gserver/layers/SequenceReshapeLayer.cpp index 319310af8c..e3d40cab50 100644 --- a/paddle/gserver/layers/SequenceReshapeLayer.cpp +++ b/paddle/legacy/gserver/layers/SequenceReshapeLayer.cpp @@ -13,9 +13,9 @@ See the License for the specific language governing permissions and limitations under the License. */ #include "Layer.h" -#include "paddle/math/Matrix.h" -#include "paddle/utils/Logging.h" -#include "paddle/utils/Stat.h" +#include "paddle/legacy/math/Matrix.h" +#include "paddle/legacy/utils/Logging.h" +#include "paddle/legacy/utils/Stat.h" namespace paddle { diff --git a/paddle/gserver/layers/SequenceSliceLayer.cpp b/paddle/legacy/gserver/layers/SequenceSliceLayer.cpp similarity index 98% rename from paddle/gserver/layers/SequenceSliceLayer.cpp rename to paddle/legacy/gserver/layers/SequenceSliceLayer.cpp index a6d810b583..3ed51c4ef2 100644 --- a/paddle/gserver/layers/SequenceSliceLayer.cpp +++ b/paddle/legacy/gserver/layers/SequenceSliceLayer.cpp @@ -13,10 +13,10 @@ See the License for the specific language governing permissions and limitations under the License. */ #include "Layer.h" -#include "paddle/math/Matrix.h" -#include "paddle/math/Vector.h" -#include "paddle/utils/Logging.h" -#include "paddle/utils/Stat.h" +#include "paddle/legacy/math/Matrix.h" +#include "paddle/legacy/math/Vector.h" +#include "paddle/legacy/utils/Logging.h" +#include "paddle/legacy/utils/Stat.h" namespace paddle { diff --git a/paddle/gserver/layers/SequenceToBatch.cpp b/paddle/legacy/gserver/layers/SequenceToBatch.cpp similarity index 100% rename from paddle/gserver/layers/SequenceToBatch.cpp rename to paddle/legacy/gserver/layers/SequenceToBatch.cpp diff --git a/paddle/gserver/layers/SequenceToBatch.h b/paddle/legacy/gserver/layers/SequenceToBatch.h similarity index 98% rename from paddle/gserver/layers/SequenceToBatch.h rename to paddle/legacy/gserver/layers/SequenceToBatch.h index 5200e702d9..7ed517937d 100644 --- a/paddle/gserver/layers/SequenceToBatch.h +++ b/paddle/legacy/gserver/layers/SequenceToBatch.h @@ -13,8 +13,8 @@ See the License for the specific language governing permissions and limitations under the License. */ #pragma once -#include "paddle/math/Matrix.h" -#include "paddle/math/Vector.h" +#include "paddle/legacy/math/Matrix.h" +#include "paddle/legacy/math/Vector.h" namespace paddle { diff --git a/paddle/gserver/layers/SliceProjection.cpp b/paddle/legacy/gserver/layers/SliceProjection.cpp similarity index 100% rename from paddle/gserver/layers/SliceProjection.cpp rename to paddle/legacy/gserver/layers/SliceProjection.cpp diff --git a/paddle/gserver/layers/SlopeInterceptLayer.cpp b/paddle/legacy/gserver/layers/SlopeInterceptLayer.cpp similarity index 95% rename from paddle/gserver/layers/SlopeInterceptLayer.cpp rename to paddle/legacy/gserver/layers/SlopeInterceptLayer.cpp index f7f4735c1b..9168fd7dda 100644 --- a/paddle/gserver/layers/SlopeInterceptLayer.cpp +++ b/paddle/legacy/gserver/layers/SlopeInterceptLayer.cpp @@ -13,9 +13,9 @@ See the License for the specific language governing permissions and limitations under the License. */ #include "Layer.h" -#include "paddle/math/Matrix.h" -#include "paddle/utils/Logging.h" -#include "paddle/utils/Stat.h" +#include "paddle/legacy/math/Matrix.h" +#include "paddle/legacy/utils/Logging.h" +#include "paddle/legacy/utils/Stat.h" namespace paddle { diff --git a/paddle/gserver/layers/SpatialPyramidPoolLayer.cpp b/paddle/legacy/gserver/layers/SpatialPyramidPoolLayer.cpp similarity index 100% rename from paddle/gserver/layers/SpatialPyramidPoolLayer.cpp rename to paddle/legacy/gserver/layers/SpatialPyramidPoolLayer.cpp diff --git a/paddle/gserver/layers/SpatialPyramidPoolLayer.h b/paddle/legacy/gserver/layers/SpatialPyramidPoolLayer.h similarity index 95% rename from paddle/gserver/layers/SpatialPyramidPoolLayer.h rename to paddle/legacy/gserver/layers/SpatialPyramidPoolLayer.h index 421bdfe09c..6d8ed9c878 100644 --- a/paddle/gserver/layers/SpatialPyramidPoolLayer.h +++ b/paddle/legacy/gserver/layers/SpatialPyramidPoolLayer.h @@ -16,8 +16,8 @@ limitations under the License. */ #include "Layer.h" #include "PoolProjection.h" -#include "paddle/math/MathUtils.h" -#include "paddle/utils/Logging.h" +#include "paddle/legacy/math/MathUtils.h" +#include "paddle/legacy/utils/Logging.h" namespace paddle { /** diff --git a/paddle/gserver/layers/SubNestedSequenceLayer.cpp b/paddle/legacy/gserver/layers/SubNestedSequenceLayer.cpp similarity index 97% rename from paddle/gserver/layers/SubNestedSequenceLayer.cpp rename to paddle/legacy/gserver/layers/SubNestedSequenceLayer.cpp index e2bb00bbfa..f363c2ac8d 100644 --- a/paddle/gserver/layers/SubNestedSequenceLayer.cpp +++ b/paddle/legacy/gserver/layers/SubNestedSequenceLayer.cpp @@ -13,10 +13,10 @@ See the License for the specific language governing permissions and limitations under the License. */ #include "Layer.h" -#include "paddle/math/Matrix.h" -#include "paddle/math/Vector.h" -#include "paddle/utils/Logging.h" -#include "paddle/utils/Stat.h" +#include "paddle/legacy/math/Matrix.h" +#include "paddle/legacy/math/Vector.h" +#include "paddle/legacy/utils/Logging.h" +#include "paddle/legacy/utils/Stat.h" namespace paddle { diff --git a/paddle/gserver/layers/SubSequenceLayer.cpp b/paddle/legacy/gserver/layers/SubSequenceLayer.cpp similarity index 97% rename from paddle/gserver/layers/SubSequenceLayer.cpp rename to paddle/legacy/gserver/layers/SubSequenceLayer.cpp index ba49f5710f..36796f0473 100644 --- a/paddle/gserver/layers/SubSequenceLayer.cpp +++ b/paddle/legacy/gserver/layers/SubSequenceLayer.cpp @@ -13,10 +13,10 @@ See the License for the specific language governing permissions and limitations under the License. */ #include "Layer.h" -#include "paddle/math/Matrix.h" -#include "paddle/math/Vector.h" -#include "paddle/utils/Logging.h" -#include "paddle/utils/Stat.h" +#include "paddle/legacy/math/Matrix.h" +#include "paddle/legacy/math/Vector.h" +#include "paddle/legacy/utils/Logging.h" +#include "paddle/legacy/utils/Stat.h" namespace paddle { diff --git a/paddle/gserver/layers/SumToOneNormLayer.cpp b/paddle/legacy/gserver/layers/SumToOneNormLayer.cpp similarity index 96% rename from paddle/gserver/layers/SumToOneNormLayer.cpp rename to paddle/legacy/gserver/layers/SumToOneNormLayer.cpp index 00764717e8..410f4dd7c9 100644 --- a/paddle/gserver/layers/SumToOneNormLayer.cpp +++ b/paddle/legacy/gserver/layers/SumToOneNormLayer.cpp @@ -13,9 +13,9 @@ See the License for the specific language governing permissions and limitations under the License. */ #include "Layer.h" -#include "paddle/math/Matrix.h" -#include "paddle/utils/Logging.h" -#include "paddle/utils/Stat.h" +#include "paddle/legacy/math/Matrix.h" +#include "paddle/legacy/utils/Logging.h" +#include "paddle/legacy/utils/Stat.h" namespace paddle { diff --git a/paddle/gserver/layers/SwitchOrderLayer.cpp b/paddle/legacy/gserver/layers/SwitchOrderLayer.cpp similarity index 98% rename from paddle/gserver/layers/SwitchOrderLayer.cpp rename to paddle/legacy/gserver/layers/SwitchOrderLayer.cpp index 704735de38..513f3df7bc 100644 --- a/paddle/gserver/layers/SwitchOrderLayer.cpp +++ b/paddle/legacy/gserver/layers/SwitchOrderLayer.cpp @@ -13,7 +13,7 @@ See the License for the specific language governing permissions and limitations under the License. */ #include "SwitchOrderLayer.h" -#include "paddle/utils/Stat.h" +#include "paddle/legacy/utils/Stat.h" namespace paddle { diff --git a/paddle/gserver/layers/SwitchOrderLayer.h b/paddle/legacy/gserver/layers/SwitchOrderLayer.h similarity index 100% rename from paddle/gserver/layers/SwitchOrderLayer.h rename to paddle/legacy/gserver/layers/SwitchOrderLayer.h diff --git a/paddle/gserver/layers/TableProjection.cpp b/paddle/legacy/gserver/layers/TableProjection.cpp similarity index 100% rename from paddle/gserver/layers/TableProjection.cpp rename to paddle/legacy/gserver/layers/TableProjection.cpp diff --git a/paddle/gserver/layers/TableProjection.h b/paddle/legacy/gserver/layers/TableProjection.h similarity index 100% rename from paddle/gserver/layers/TableProjection.h rename to paddle/legacy/gserver/layers/TableProjection.h diff --git a/paddle/gserver/layers/TensorLayer.cpp b/paddle/legacy/gserver/layers/TensorLayer.cpp similarity index 98% rename from paddle/gserver/layers/TensorLayer.cpp rename to paddle/legacy/gserver/layers/TensorLayer.cpp index b2271c63ef..7f874bce0f 100644 --- a/paddle/gserver/layers/TensorLayer.cpp +++ b/paddle/legacy/gserver/layers/TensorLayer.cpp @@ -14,8 +14,8 @@ limitations under the License. */ #include "TensorLayer.h" -#include "paddle/utils/Logging.h" -#include "paddle/utils/Stat.h" +#include "paddle/legacy/utils/Logging.h" +#include "paddle/legacy/utils/Stat.h" namespace paddle { diff --git a/paddle/gserver/layers/TensorLayer.h b/paddle/legacy/gserver/layers/TensorLayer.h similarity index 94% rename from paddle/gserver/layers/TensorLayer.h rename to paddle/legacy/gserver/layers/TensorLayer.h index 5c1ee40ced..fc491a7c9f 100644 --- a/paddle/gserver/layers/TensorLayer.h +++ b/paddle/legacy/gserver/layers/TensorLayer.h @@ -15,8 +15,8 @@ limitations under the License. */ #pragma once #include "Layer.h" -#include "paddle/math/Matrix.h" -#include "paddle/utils/ThreadLocal.h" +#include "paddle/legacy/math/Matrix.h" +#include "paddle/legacy/utils/ThreadLocal.h" namespace paddle { diff --git a/paddle/gserver/layers/TransLayer.cpp b/paddle/legacy/gserver/layers/TransLayer.cpp similarity index 97% rename from paddle/gserver/layers/TransLayer.cpp rename to paddle/legacy/gserver/layers/TransLayer.cpp index cf87ca53d1..fd1d435ea5 100644 --- a/paddle/gserver/layers/TransLayer.cpp +++ b/paddle/legacy/gserver/layers/TransLayer.cpp @@ -13,7 +13,7 @@ See the License for the specific language governing permissions and limitations under the License. */ #include "TransLayer.h" -#include "paddle/utils/Logging.h" +#include "paddle/legacy/utils/Logging.h" namespace paddle { REGISTER_LAYER(trans, TransLayer); diff --git a/paddle/gserver/layers/TransLayer.h b/paddle/legacy/gserver/layers/TransLayer.h similarity index 96% rename from paddle/gserver/layers/TransLayer.h rename to paddle/legacy/gserver/layers/TransLayer.h index 1cd8fd91f7..0a6b13933f 100644 --- a/paddle/gserver/layers/TransLayer.h +++ b/paddle/legacy/gserver/layers/TransLayer.h @@ -16,7 +16,7 @@ limitations under the License. */ #include #include "Layer.h" -#include "paddle/math/Matrix.h" +#include "paddle/legacy/math/Matrix.h" namespace paddle { /** diff --git a/paddle/gserver/layers/TransposedFullMatrixProjection.cpp b/paddle/legacy/gserver/layers/TransposedFullMatrixProjection.cpp similarity index 98% rename from paddle/gserver/layers/TransposedFullMatrixProjection.cpp rename to paddle/legacy/gserver/layers/TransposedFullMatrixProjection.cpp index 45f5977989..c8533dc7d7 100644 --- a/paddle/gserver/layers/TransposedFullMatrixProjection.cpp +++ b/paddle/legacy/gserver/layers/TransposedFullMatrixProjection.cpp @@ -13,7 +13,7 @@ See the License for the specific language governing permissions and limitations under the License. */ #include "Projection.h" -#include "paddle/utils/Stat.h" +#include "paddle/legacy/utils/Stat.h" namespace paddle { diff --git a/paddle/gserver/layers/UpsampleLayer.cpp b/paddle/legacy/gserver/layers/UpsampleLayer.cpp similarity index 100% rename from paddle/gserver/layers/UpsampleLayer.cpp rename to paddle/legacy/gserver/layers/UpsampleLayer.cpp diff --git a/paddle/gserver/layers/UpsampleLayer.h b/paddle/legacy/gserver/layers/UpsampleLayer.h similarity index 92% rename from paddle/gserver/layers/UpsampleLayer.h rename to paddle/legacy/gserver/layers/UpsampleLayer.h index c9d079c314..2fe5938244 100644 --- a/paddle/gserver/layers/UpsampleLayer.h +++ b/paddle/legacy/gserver/layers/UpsampleLayer.h @@ -16,9 +16,9 @@ limitations under the License. */ #include #include "Layer.h" -#include "paddle/math/Matrix.h" -#include "paddle/utils/Logging.h" -#include "paddle/utils/Stat.h" +#include "paddle/legacy/math/Matrix.h" +#include "paddle/legacy/utils/Logging.h" +#include "paddle/legacy/utils/Stat.h" namespace paddle { diff --git a/paddle/gserver/layers/ValidationLayer.cpp b/paddle/legacy/gserver/layers/ValidationLayer.cpp similarity index 99% rename from paddle/gserver/layers/ValidationLayer.cpp rename to paddle/legacy/gserver/layers/ValidationLayer.cpp index b626825a7b..9956fd2ed4 100644 --- a/paddle/gserver/layers/ValidationLayer.cpp +++ b/paddle/legacy/gserver/layers/ValidationLayer.cpp @@ -17,7 +17,7 @@ limitations under the License. */ #include #include "ValidationLayer.h" -#include "paddle/utils/Logging.h" +#include "paddle/legacy/utils/Logging.h" namespace paddle { diff --git a/paddle/gserver/layers/ValidationLayer.h b/paddle/legacy/gserver/layers/ValidationLayer.h similarity index 97% rename from paddle/gserver/layers/ValidationLayer.h rename to paddle/legacy/gserver/layers/ValidationLayer.h index be41128ef4..fbc94e8ef5 100644 --- a/paddle/gserver/layers/ValidationLayer.h +++ b/paddle/legacy/gserver/layers/ValidationLayer.h @@ -16,7 +16,7 @@ limitations under the License. */ #include #include "Layer.h" -#include "paddle/gserver/evaluators/Evaluator.h" +#include "paddle/legacy/gserver/evaluators/Evaluator.h" DECLARE_int32(trainer_id); diff --git a/paddle/gserver/layers/WarpCTCLayer.cpp b/paddle/legacy/gserver/layers/WarpCTCLayer.cpp similarity index 100% rename from paddle/gserver/layers/WarpCTCLayer.cpp rename to paddle/legacy/gserver/layers/WarpCTCLayer.cpp diff --git a/paddle/gserver/layers/WarpCTCLayer.h b/paddle/legacy/gserver/layers/WarpCTCLayer.h similarity index 100% rename from paddle/gserver/layers/WarpCTCLayer.h rename to paddle/legacy/gserver/layers/WarpCTCLayer.h diff --git a/paddle/gserver/tests/.gitignore b/paddle/legacy/gserver/tests/.gitignore similarity index 100% rename from paddle/gserver/tests/.gitignore rename to paddle/legacy/gserver/tests/.gitignore diff --git a/paddle/gserver/tests/CMakeLists.txt b/paddle/legacy/gserver/tests/CMakeLists.txt similarity index 97% rename from paddle/gserver/tests/CMakeLists.txt rename to paddle/legacy/gserver/tests/CMakeLists.txt index 9d7cad7584..93ddf5aa23 100644 --- a/paddle/gserver/tests/CMakeLists.txt +++ b/paddle/legacy/gserver/tests/CMakeLists.txt @@ -36,7 +36,7 @@ gserver_test(test_Upsample) set(PYTHON_PATH ${PADDLE_SOURCE_DIR}/paddle/.set_python_path.sh -d - ${PADDLE_BINARY_DIR}/python/:${PADDLE_BINARY_DIR}/paddle/gserver/tests) + ${PADDLE_BINARY_DIR}/python/:${PADDLE_BINARY_DIR}/paddle/legacy/gserver/tests) function(gserver_test_with_python TARGET) add_unittest_without_exec(${TARGET} ${TARGET}.cpp) add_test(NAME ${TARGET} diff --git a/paddle/gserver/tests/LayerGradUtil.cpp b/paddle/legacy/gserver/tests/LayerGradUtil.cpp similarity index 100% rename from paddle/gserver/tests/LayerGradUtil.cpp rename to paddle/legacy/gserver/tests/LayerGradUtil.cpp diff --git a/paddle/gserver/tests/LayerGradUtil.h b/paddle/legacy/gserver/tests/LayerGradUtil.h similarity index 99% rename from paddle/gserver/tests/LayerGradUtil.h rename to paddle/legacy/gserver/tests/LayerGradUtil.h index 1999b2204b..941989a1da 100644 --- a/paddle/gserver/tests/LayerGradUtil.h +++ b/paddle/legacy/gserver/tests/LayerGradUtil.h @@ -14,7 +14,7 @@ limitations under the License. */ #pragma once #include "ModelConfig.pb.h" -#include "paddle/gserver/layers/DataLayer.h" +#include "paddle/legacy/gserver/layers/DataLayer.h" #include "paddle/testing/TestUtil.h" using namespace std; // NOLINT diff --git a/paddle/gserver/tests/MKLDNNTester.cpp b/paddle/legacy/gserver/tests/MKLDNNTester.cpp similarity index 99% rename from paddle/gserver/tests/MKLDNNTester.cpp rename to paddle/legacy/gserver/tests/MKLDNNTester.cpp index d2a9761a4e..b550ba9c72 100644 --- a/paddle/gserver/tests/MKLDNNTester.cpp +++ b/paddle/legacy/gserver/tests/MKLDNNTester.cpp @@ -13,9 +13,9 @@ See the License for the specific language governing permissions and limitations under the License. */ #include "MKLDNNTester.h" -#include "paddle/gserver/layers/MKLDNNBase.h" -#include "paddle/gserver/layers/MKLDNNLayer.h" -#include "paddle/trainer/Trainer.h" +#include "paddle/legacy/gserver/layers/MKLDNNBase.h" +#include "paddle/legacy/gserver/layers/MKLDNNLayer.h" +#include "paddle/legacy/trainer/Trainer.h" namespace paddle { diff --git a/paddle/gserver/tests/MKLDNNTester.h b/paddle/legacy/gserver/tests/MKLDNNTester.h similarity index 97% rename from paddle/gserver/tests/MKLDNNTester.h rename to paddle/legacy/gserver/tests/MKLDNNTester.h index 41ac46b70a..086846ce53 100644 --- a/paddle/gserver/tests/MKLDNNTester.h +++ b/paddle/legacy/gserver/tests/MKLDNNTester.h @@ -17,8 +17,8 @@ limitations under the License. */ #include #include #include "LayerGradUtil.h" -#include "paddle/gserver/layers/MKLDNNBase.h" -#include "paddle/gserver/layers/MKLDNNLayer.h" +#include "paddle/legacy/gserver/layers/MKLDNNBase.h" +#include "paddle/legacy/gserver/layers/MKLDNNLayer.h" namespace paddle { diff --git a/paddle/gserver/tests/Sequence/dummy.list b/paddle/legacy/gserver/tests/Sequence/dummy.list similarity index 100% rename from paddle/gserver/tests/Sequence/dummy.list rename to paddle/legacy/gserver/tests/Sequence/dummy.list diff --git a/paddle/gserver/tests/Sequence/tour_dict_phrase.dict b/paddle/legacy/gserver/tests/Sequence/tour_dict_phrase.dict similarity index 100% rename from paddle/gserver/tests/Sequence/tour_dict_phrase.dict rename to paddle/legacy/gserver/tests/Sequence/tour_dict_phrase.dict diff --git a/paddle/gserver/tests/Sequence/tour_train_wdseg b/paddle/legacy/gserver/tests/Sequence/tour_train_wdseg similarity index 100% rename from paddle/gserver/tests/Sequence/tour_train_wdseg rename to paddle/legacy/gserver/tests/Sequence/tour_train_wdseg diff --git a/paddle/gserver/tests/Sequence/tour_train_wdseg.nest b/paddle/legacy/gserver/tests/Sequence/tour_train_wdseg.nest similarity index 100% rename from paddle/gserver/tests/Sequence/tour_train_wdseg.nest rename to paddle/legacy/gserver/tests/Sequence/tour_train_wdseg.nest diff --git a/paddle/legacy/gserver/tests/Sequence/train.list b/paddle/legacy/gserver/tests/Sequence/train.list new file mode 100644 index 0000000000..1109a24492 --- /dev/null +++ b/paddle/legacy/gserver/tests/Sequence/train.list @@ -0,0 +1 @@ +legacy/gserver/tests/Sequence/tour_train_wdseg diff --git a/paddle/legacy/gserver/tests/Sequence/train.list.nest b/paddle/legacy/gserver/tests/Sequence/train.list.nest new file mode 100644 index 0000000000..a67df35024 --- /dev/null +++ b/paddle/legacy/gserver/tests/Sequence/train.list.nest @@ -0,0 +1 @@ +legacy/gserver/tests/Sequence/tour_train_wdseg.nest diff --git a/paddle/gserver/tests/__init__.py b/paddle/legacy/gserver/tests/__init__.py similarity index 100% rename from paddle/gserver/tests/__init__.py rename to paddle/legacy/gserver/tests/__init__.py diff --git a/paddle/gserver/tests/concat_dotmul_a.conf b/paddle/legacy/gserver/tests/concat_dotmul_a.conf similarity index 100% rename from paddle/gserver/tests/concat_dotmul_a.conf rename to paddle/legacy/gserver/tests/concat_dotmul_a.conf diff --git a/paddle/gserver/tests/concat_dotmul_b.conf b/paddle/legacy/gserver/tests/concat_dotmul_b.conf similarity index 100% rename from paddle/gserver/tests/concat_dotmul_b.conf rename to paddle/legacy/gserver/tests/concat_dotmul_b.conf diff --git a/paddle/gserver/tests/concat_fullmatrix_a.conf b/paddle/legacy/gserver/tests/concat_fullmatrix_a.conf similarity index 100% rename from paddle/gserver/tests/concat_fullmatrix_a.conf rename to paddle/legacy/gserver/tests/concat_fullmatrix_a.conf diff --git a/paddle/gserver/tests/concat_fullmatrix_b.conf b/paddle/legacy/gserver/tests/concat_fullmatrix_b.conf similarity index 100% rename from paddle/gserver/tests/concat_fullmatrix_b.conf rename to paddle/legacy/gserver/tests/concat_fullmatrix_b.conf diff --git a/paddle/gserver/tests/concat_slice_a.conf b/paddle/legacy/gserver/tests/concat_slice_a.conf similarity index 100% rename from paddle/gserver/tests/concat_slice_a.conf rename to paddle/legacy/gserver/tests/concat_slice_a.conf diff --git a/paddle/gserver/tests/concat_slice_b.conf b/paddle/legacy/gserver/tests/concat_slice_b.conf similarity index 100% rename from paddle/gserver/tests/concat_slice_b.conf rename to paddle/legacy/gserver/tests/concat_slice_b.conf diff --git a/paddle/gserver/tests/concat_table_a.conf b/paddle/legacy/gserver/tests/concat_table_a.conf similarity index 100% rename from paddle/gserver/tests/concat_table_a.conf rename to paddle/legacy/gserver/tests/concat_table_a.conf diff --git a/paddle/gserver/tests/concat_table_b.conf b/paddle/legacy/gserver/tests/concat_table_b.conf similarity index 100% rename from paddle/gserver/tests/concat_table_b.conf rename to paddle/legacy/gserver/tests/concat_table_b.conf diff --git a/paddle/gserver/tests/img_conv_a.conf b/paddle/legacy/gserver/tests/img_conv_a.conf similarity index 100% rename from paddle/gserver/tests/img_conv_a.conf rename to paddle/legacy/gserver/tests/img_conv_a.conf diff --git a/paddle/gserver/tests/img_conv_b.conf b/paddle/legacy/gserver/tests/img_conv_b.conf similarity index 100% rename from paddle/gserver/tests/img_conv_b.conf rename to paddle/legacy/gserver/tests/img_conv_b.conf diff --git a/paddle/gserver/tests/img_conv_c.conf b/paddle/legacy/gserver/tests/img_conv_c.conf similarity index 100% rename from paddle/gserver/tests/img_conv_c.conf rename to paddle/legacy/gserver/tests/img_conv_c.conf diff --git a/paddle/gserver/tests/img_conv_cudnn.py b/paddle/legacy/gserver/tests/img_conv_cudnn.py similarity index 100% rename from paddle/gserver/tests/img_conv_cudnn.py rename to paddle/legacy/gserver/tests/img_conv_cudnn.py diff --git a/paddle/gserver/tests/img_conv_exconv.py b/paddle/legacy/gserver/tests/img_conv_exconv.py similarity index 100% rename from paddle/gserver/tests/img_conv_exconv.py rename to paddle/legacy/gserver/tests/img_conv_exconv.py diff --git a/paddle/gserver/tests/img_pool_a.conf b/paddle/legacy/gserver/tests/img_pool_a.conf similarity index 100% rename from paddle/gserver/tests/img_pool_a.conf rename to paddle/legacy/gserver/tests/img_pool_a.conf diff --git a/paddle/gserver/tests/img_pool_b.conf b/paddle/legacy/gserver/tests/img_pool_b.conf similarity index 100% rename from paddle/gserver/tests/img_pool_b.conf rename to paddle/legacy/gserver/tests/img_pool_b.conf diff --git a/paddle/gserver/tests/mkldnn_branch_net.conf b/paddle/legacy/gserver/tests/mkldnn_branch_net.conf similarity index 100% rename from paddle/gserver/tests/mkldnn_branch_net.conf rename to paddle/legacy/gserver/tests/mkldnn_branch_net.conf diff --git a/paddle/gserver/tests/mkldnn_simple_net.conf b/paddle/legacy/gserver/tests/mkldnn_simple_net.conf similarity index 100% rename from paddle/gserver/tests/mkldnn_simple_net.conf rename to paddle/legacy/gserver/tests/mkldnn_simple_net.conf diff --git a/paddle/gserver/tests/pyDataProvider.py b/paddle/legacy/gserver/tests/pyDataProvider.py similarity index 100% rename from paddle/gserver/tests/pyDataProvider.py rename to paddle/legacy/gserver/tests/pyDataProvider.py diff --git a/paddle/gserver/tests/pyDataProvider/pyDataProviderList b/paddle/legacy/gserver/tests/pyDataProvider/pyDataProviderList similarity index 100% rename from paddle/gserver/tests/pyDataProvider/pyDataProviderList rename to paddle/legacy/gserver/tests/pyDataProvider/pyDataProviderList diff --git a/paddle/gserver/tests/pyDataProvider/trainer.conf b/paddle/legacy/gserver/tests/pyDataProvider/trainer.conf similarity index 100% rename from paddle/gserver/tests/pyDataProvider/trainer.conf rename to paddle/legacy/gserver/tests/pyDataProvider/trainer.conf diff --git a/paddle/gserver/tests/rnn_data_provider.py b/paddle/legacy/gserver/tests/rnn_data_provider.py similarity index 100% rename from paddle/gserver/tests/rnn_data_provider.py rename to paddle/legacy/gserver/tests/rnn_data_provider.py diff --git a/paddle/gserver/tests/sequenceGen.py b/paddle/legacy/gserver/tests/sequenceGen.py similarity index 100% rename from paddle/gserver/tests/sequenceGen.py rename to paddle/legacy/gserver/tests/sequenceGen.py diff --git a/paddle/gserver/tests/sequence_layer_group.conf b/paddle/legacy/gserver/tests/sequence_layer_group.conf similarity index 93% rename from paddle/gserver/tests/sequence_layer_group.conf rename to paddle/legacy/gserver/tests/sequence_layer_group.conf index 50f2d89d02..ad1b61d582 100644 --- a/paddle/gserver/tests/sequence_layer_group.conf +++ b/paddle/legacy/gserver/tests/sequence_layer_group.conf @@ -16,13 +16,13 @@ from paddle.trainer_config_helpers import * ######################## data source ################################ -dict_path = 'gserver/tests/Sequence/tour_dict_phrase.dict' +dict_path = 'legacy/gserver/tests/Sequence/tour_dict_phrase.dict' dict_file = dict() for line_count, line in enumerate(open(dict_path, "r")): dict_file[line.strip()] = line_count define_py_data_sources2( - train_list='gserver/tests/Sequence/train.list', + train_list='legacy/gserver/tests/Sequence/train.list', test_list=None, module='sequenceGen', obj='process', diff --git a/paddle/gserver/tests/sequence_lstm.conf b/paddle/legacy/gserver/tests/sequence_lstm.conf similarity index 93% rename from paddle/gserver/tests/sequence_lstm.conf rename to paddle/legacy/gserver/tests/sequence_lstm.conf index f49a827f22..6ab70e7071 100644 --- a/paddle/gserver/tests/sequence_lstm.conf +++ b/paddle/legacy/gserver/tests/sequence_lstm.conf @@ -16,13 +16,13 @@ from paddle.trainer_config_helpers import * ######################## data source ################################ -dict_path = 'gserver/tests/Sequence/tour_dict_phrase.dict' +dict_path = 'legacy/gserver/tests/Sequence/tour_dict_phrase.dict' dict_file = dict() for line_count, line in enumerate(open(dict_path, "r")): dict_file[line.strip()] = line_count define_py_data_sources2( - train_list='gserver/tests/Sequence/train.list', + train_list='legacy/gserver/tests/Sequence/train.list', test_list=None, module='sequenceGen', obj='process', diff --git a/paddle/gserver/tests/sequence_nest_layer_group.conf b/paddle/legacy/gserver/tests/sequence_nest_layer_group.conf similarity index 95% rename from paddle/gserver/tests/sequence_nest_layer_group.conf rename to paddle/legacy/gserver/tests/sequence_nest_layer_group.conf index 71ef53d08a..75c36b1189 100644 --- a/paddle/gserver/tests/sequence_nest_layer_group.conf +++ b/paddle/legacy/gserver/tests/sequence_nest_layer_group.conf @@ -16,13 +16,13 @@ from paddle.trainer_config_helpers import * ######################## data source ################################ -dict_path = 'gserver/tests/Sequence/tour_dict_phrase.dict' +dict_path = 'legacy/gserver/tests/Sequence/tour_dict_phrase.dict' dict_file = dict() for line_count, line in enumerate(open(dict_path, "r")): dict_file[line.strip()] = line_count define_py_data_sources2( - train_list='gserver/tests/Sequence/train.list.nest', + train_list='legacy/gserver/tests/Sequence/train.list.nest', test_list=None, module='sequenceGen', obj='process2', diff --git a/paddle/gserver/tests/sequence_nest_rnn.conf b/paddle/legacy/gserver/tests/sequence_nest_rnn.conf similarity index 96% rename from paddle/gserver/tests/sequence_nest_rnn.conf rename to paddle/legacy/gserver/tests/sequence_nest_rnn.conf index 2873a59966..bc3b22c2a9 100644 --- a/paddle/gserver/tests/sequence_nest_rnn.conf +++ b/paddle/legacy/gserver/tests/sequence_nest_rnn.conf @@ -16,7 +16,7 @@ from paddle.trainer_config_helpers import * ######################## data source ################################ -define_py_data_sources2(train_list='gserver/tests/Sequence/dummy.list', +define_py_data_sources2(train_list='legacy/gserver/tests/Sequence/dummy.list', test_list=None, module='rnn_data_provider', obj='process_subseq') diff --git a/paddle/gserver/tests/sequence_nest_rnn_multi_input.conf b/paddle/legacy/gserver/tests/sequence_nest_rnn_multi_input.conf similarity index 97% rename from paddle/gserver/tests/sequence_nest_rnn_multi_input.conf rename to paddle/legacy/gserver/tests/sequence_nest_rnn_multi_input.conf index afdacfffd7..165ab22989 100644 --- a/paddle/gserver/tests/sequence_nest_rnn_multi_input.conf +++ b/paddle/legacy/gserver/tests/sequence_nest_rnn_multi_input.conf @@ -16,7 +16,7 @@ from paddle.trainer_config_helpers import * ######################## data source ################################ -define_py_data_sources2(train_list='gserver/tests/Sequence/dummy.list', +define_py_data_sources2(train_list='legacy/gserver/tests/Sequence/dummy.list', test_list=None, module='rnn_data_provider', obj='process_subseq') diff --git a/paddle/gserver/tests/sequence_nest_rnn_multi_unequalength_inputs.py b/paddle/legacy/gserver/tests/sequence_nest_rnn_multi_unequalength_inputs.py similarity index 98% rename from paddle/gserver/tests/sequence_nest_rnn_multi_unequalength_inputs.py rename to paddle/legacy/gserver/tests/sequence_nest_rnn_multi_unequalength_inputs.py index 569d3c094b..9a48b7f25c 100644 --- a/paddle/gserver/tests/sequence_nest_rnn_multi_unequalength_inputs.py +++ b/paddle/legacy/gserver/tests/sequence_nest_rnn_multi_unequalength_inputs.py @@ -15,7 +15,7 @@ from paddle.trainer_config_helpers import * ######################## data source ################################ define_py_data_sources2( - train_list='gserver/tests/Sequence/dummy.list', + train_list='legacy/gserver/tests/Sequence/dummy.list', test_list=None, module='rnn_data_provider', obj='process_unequalength_subseq') diff --git a/paddle/gserver/tests/sequence_recurrent.py b/paddle/legacy/gserver/tests/sequence_recurrent.py similarity index 93% rename from paddle/gserver/tests/sequence_recurrent.py rename to paddle/legacy/gserver/tests/sequence_recurrent.py index b88c09084e..e2c6a7935c 100644 --- a/paddle/gserver/tests/sequence_recurrent.py +++ b/paddle/legacy/gserver/tests/sequence_recurrent.py @@ -15,13 +15,13 @@ from paddle.trainer_config_helpers import * ######################## data source ################################ -dict_path = 'gserver/tests/Sequence/tour_dict_phrase.dict' +dict_path = 'legacy/gserver/tests/Sequence/tour_dict_phrase.dict' dict_file = dict() for line_count, line in enumerate(open(dict_path, "r")): dict_file[line.strip()] = line_count define_py_data_sources2( - train_list='gserver/tests/Sequence/train.list', + train_list='legacy/gserver/tests/Sequence/train.list', test_list=None, module='sequenceGen', obj='process', diff --git a/paddle/gserver/tests/sequence_recurrent_group.py b/paddle/legacy/gserver/tests/sequence_recurrent_group.py similarity index 94% rename from paddle/gserver/tests/sequence_recurrent_group.py rename to paddle/legacy/gserver/tests/sequence_recurrent_group.py index 0daf746700..b4638bd907 100644 --- a/paddle/gserver/tests/sequence_recurrent_group.py +++ b/paddle/legacy/gserver/tests/sequence_recurrent_group.py @@ -14,13 +14,13 @@ from paddle.trainer_config_helpers import * ######################## data source ################################ -dict_path = 'gserver/tests/Sequence/tour_dict_phrase.dict' +dict_path = 'legacy/gserver/tests/Sequence/tour_dict_phrase.dict' dict_file = dict() for line_count, line in enumerate(open(dict_path, "r")): dict_file[line.strip()] = line_count define_py_data_sources2( - train_list='gserver/tests/Sequence/train.list', + train_list='legacy/gserver/tests/Sequence/train.list', test_list=None, module='sequenceGen', obj='process', diff --git a/paddle/gserver/tests/sequence_rnn.conf b/paddle/legacy/gserver/tests/sequence_rnn.conf similarity index 95% rename from paddle/gserver/tests/sequence_rnn.conf rename to paddle/legacy/gserver/tests/sequence_rnn.conf index 1084edfe70..3133595c9c 100644 --- a/paddle/gserver/tests/sequence_rnn.conf +++ b/paddle/legacy/gserver/tests/sequence_rnn.conf @@ -16,7 +16,7 @@ from paddle.trainer_config_helpers import * ######################## data source ################################ -define_py_data_sources2(train_list='gserver/tests/Sequence/dummy.list', +define_py_data_sources2(train_list='legacy/gserver/tests/Sequence/dummy.list', test_list=None, module='rnn_data_provider', obj='process_seq') diff --git a/paddle/gserver/tests/sequence_rnn_matched_inputs.py b/paddle/legacy/gserver/tests/sequence_rnn_matched_inputs.py similarity index 97% rename from paddle/gserver/tests/sequence_rnn_matched_inputs.py rename to paddle/legacy/gserver/tests/sequence_rnn_matched_inputs.py index 41a581e0cc..921cef04dd 100644 --- a/paddle/gserver/tests/sequence_rnn_matched_inputs.py +++ b/paddle/legacy/gserver/tests/sequence_rnn_matched_inputs.py @@ -16,7 +16,7 @@ from paddle.trainer_config_helpers import * ######################## data source ################################ define_py_data_sources2( - train_list='gserver/tests/Sequence/dummy.list', + train_list='legacy/gserver/tests/Sequence/dummy.list', test_list=None, module='rnn_data_provider', obj='process_mixed') diff --git a/paddle/gserver/tests/sequence_rnn_mixed_inputs.py b/paddle/legacy/gserver/tests/sequence_rnn_mixed_inputs.py similarity index 97% rename from paddle/gserver/tests/sequence_rnn_mixed_inputs.py rename to paddle/legacy/gserver/tests/sequence_rnn_mixed_inputs.py index ae89d8e2bb..c7bcaf6c4b 100644 --- a/paddle/gserver/tests/sequence_rnn_mixed_inputs.py +++ b/paddle/legacy/gserver/tests/sequence_rnn_mixed_inputs.py @@ -16,7 +16,7 @@ from paddle.trainer_config_helpers import * ######################## data source ################################ define_py_data_sources2( - train_list='gserver/tests/Sequence/dummy.list', + train_list='legacy/gserver/tests/Sequence/dummy.list', test_list=None, module='rnn_data_provider', obj='process_mixed') diff --git a/paddle/gserver/tests/sequence_rnn_multi_input.conf b/paddle/legacy/gserver/tests/sequence_rnn_multi_input.conf similarity index 95% rename from paddle/gserver/tests/sequence_rnn_multi_input.conf rename to paddle/legacy/gserver/tests/sequence_rnn_multi_input.conf index 9fae974f30..bf4be779a2 100644 --- a/paddle/gserver/tests/sequence_rnn_multi_input.conf +++ b/paddle/legacy/gserver/tests/sequence_rnn_multi_input.conf @@ -16,7 +16,7 @@ from paddle.trainer_config_helpers import * ######################## data source ################################ -define_py_data_sources2(train_list='gserver/tests/Sequence/dummy.list', +define_py_data_sources2(train_list='legacy/gserver/tests/Sequence/dummy.list', test_list=None, module='rnn_data_provider', obj='process_seq') diff --git a/paddle/gserver/tests/sequence_rnn_multi_unequalength_inputs.py b/paddle/legacy/gserver/tests/sequence_rnn_multi_unequalength_inputs.py similarity index 97% rename from paddle/gserver/tests/sequence_rnn_multi_unequalength_inputs.py rename to paddle/legacy/gserver/tests/sequence_rnn_multi_unequalength_inputs.py index 6473fb3f3e..3612b49c22 100644 --- a/paddle/gserver/tests/sequence_rnn_multi_unequalength_inputs.py +++ b/paddle/legacy/gserver/tests/sequence_rnn_multi_unequalength_inputs.py @@ -16,7 +16,7 @@ from paddle.trainer_config_helpers import * ######################## data source ################################ define_py_data_sources2( - train_list='gserver/tests/Sequence/dummy.list', + train_list='legacy/gserver/tests/Sequence/dummy.list', test_list=None, module='rnn_data_provider', obj='process_unequalength_seq') diff --git a/paddle/gserver/tests/test_ActivationGrad.cpp b/paddle/legacy/gserver/tests/test_ActivationGrad.cpp similarity index 98% rename from paddle/gserver/tests/test_ActivationGrad.cpp rename to paddle/legacy/gserver/tests/test_ActivationGrad.cpp index b5e4af26dc..f468d229a8 100644 --- a/paddle/gserver/tests/test_ActivationGrad.cpp +++ b/paddle/legacy/gserver/tests/test_ActivationGrad.cpp @@ -16,7 +16,7 @@ limitations under the License. */ #include #include #include "ModelConfig.pb.h" -#include "paddle/gserver/layers/DataLayer.h" +#include "paddle/legacy/gserver/layers/DataLayer.h" #include "LayerGradUtil.h" #include "paddle/testing/TestUtil.h" diff --git a/paddle/gserver/tests/test_BatchNorm.cpp b/paddle/legacy/gserver/tests/test_BatchNorm.cpp similarity index 96% rename from paddle/gserver/tests/test_BatchNorm.cpp rename to paddle/legacy/gserver/tests/test_BatchNorm.cpp index a3ec66c758..e21fa16074 100644 --- a/paddle/gserver/tests/test_BatchNorm.cpp +++ b/paddle/legacy/gserver/tests/test_BatchNorm.cpp @@ -16,12 +16,12 @@ limitations under the License. */ #include #include #include "ModelConfig.pb.h" -#include "paddle/gserver/layers/DataLayer.h" -#include "paddle/utils/GlobalConstants.h" +#include "paddle/legacy/gserver/layers/DataLayer.h" +#include "paddle/legacy/utils/GlobalConstants.h" #include "LayerGradUtil.h" -#include "paddle/cuda/include/hl_batch_norm.h" -#include "paddle/math/tests/TensorCheck.h" +#include "paddle/legacy/cuda/include/hl_batch_norm.h" +#include "paddle/legacy/math/tests/TensorCheck.h" #include "paddle/testing/TestUtil.h" using namespace paddle; // NOLINT diff --git a/paddle/gserver/tests/test_CRFLayerGrad.cpp b/paddle/legacy/gserver/tests/test_CRFLayerGrad.cpp similarity index 97% rename from paddle/gserver/tests/test_CRFLayerGrad.cpp rename to paddle/legacy/gserver/tests/test_CRFLayerGrad.cpp index 9f3d293656..1dafd1de4d 100644 --- a/paddle/gserver/tests/test_CRFLayerGrad.cpp +++ b/paddle/legacy/gserver/tests/test_CRFLayerGrad.cpp @@ -14,8 +14,8 @@ limitations under the License. */ #include #include "ModelConfig.pb.h" -#include "paddle/gserver/layers/DataLayer.h" -#include "paddle/gserver/layers/LinearChainCRF.h" +#include "paddle/legacy/gserver/layers/DataLayer.h" +#include "paddle/legacy/gserver/layers/LinearChainCRF.h" #include "LayerGradUtil.h" #include "paddle/testing/TestUtil.h" diff --git a/paddle/gserver/tests/test_CompareSparse.cpp b/paddle/legacy/gserver/tests/test_CompareSparse.cpp similarity index 97% rename from paddle/gserver/tests/test_CompareSparse.cpp rename to paddle/legacy/gserver/tests/test_CompareSparse.cpp index 2fbc404125..11b633a588 100644 --- a/paddle/gserver/tests/test_CompareSparse.cpp +++ b/paddle/legacy/gserver/tests/test_CompareSparse.cpp @@ -12,17 +12,17 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. */ -#include +#include -#include "paddle/trainer/Trainer.h" +#include "paddle/legacy/trainer/Trainer.h" #include -#include +#include using namespace paddle; // NOLINT using namespace std; // NOLINT -static const string& configFile1 = "gserver/tests/sequence_lstm.conf"; +static const string& configFile1 = "legacy/gserver/tests/sequence_lstm.conf"; DECLARE_bool(use_gpu); DECLARE_string(config); diff --git a/paddle/gserver/tests/test_CompareTwoNets.cpp b/paddle/legacy/gserver/tests/test_CompareTwoNets.cpp similarity index 96% rename from paddle/gserver/tests/test_CompareTwoNets.cpp rename to paddle/legacy/gserver/tests/test_CompareTwoNets.cpp index 1c9b4002a3..e19c34abbd 100644 --- a/paddle/gserver/tests/test_CompareTwoNets.cpp +++ b/paddle/legacy/gserver/tests/test_CompareTwoNets.cpp @@ -13,11 +13,11 @@ See the License for the specific language governing permissions and limitations under the License. */ #include -#include +#include #include #include -#include "paddle/trainer/Trainer.h" +#include "paddle/legacy/trainer/Trainer.h" using namespace paddle; // NOLINT using namespace std; // NOLINT @@ -40,9 +40,10 @@ DEFINE_double( DECLARE_bool(thread_local_rand_use_global_seed); DECLARE_int32(seed); -static const string& config_file_a = "gserver/tests/sequence_recurrent.py"; +static const string& config_file_a = + "legacy/gserver/tests/sequence_recurrent.py"; static const string& config_file_b = - "gserver/tests/sequence_recurrent_group.py"; + "legacy/gserver/tests/sequence_recurrent_group.py"; struct ComData { vector outArgs; diff --git a/paddle/gserver/tests/test_ConvTrans.cpp b/paddle/legacy/gserver/tests/test_ConvTrans.cpp similarity index 98% rename from paddle/gserver/tests/test_ConvTrans.cpp rename to paddle/legacy/gserver/tests/test_ConvTrans.cpp index 2e394a74b7..4ea0a3d379 100644 --- a/paddle/gserver/tests/test_ConvTrans.cpp +++ b/paddle/legacy/gserver/tests/test_ConvTrans.cpp @@ -16,9 +16,9 @@ limitations under the License. */ #include #include #include "ModelConfig.pb.h" -#include "paddle/gserver/layers/DataLayer.h" -#include "paddle/math/MathUtils.h" -#include "paddle/utils/GlobalConstants.h" +#include "paddle/legacy/gserver/layers/DataLayer.h" +#include "paddle/legacy/math/MathUtils.h" +#include "paddle/legacy/utils/GlobalConstants.h" #include "LayerGradUtil.h" #include "paddle/testing/TestUtil.h" diff --git a/paddle/gserver/tests/test_ConvUnify.cpp b/paddle/legacy/gserver/tests/test_ConvUnify.cpp similarity index 98% rename from paddle/gserver/tests/test_ConvUnify.cpp rename to paddle/legacy/gserver/tests/test_ConvUnify.cpp index ba820d9a2a..d4ca158352 100644 --- a/paddle/gserver/tests/test_ConvUnify.cpp +++ b/paddle/legacy/gserver/tests/test_ConvUnify.cpp @@ -16,9 +16,9 @@ limitations under the License. */ #include #include #include "ModelConfig.pb.h" -#include "paddle/gserver/layers/DataLayer.h" -#include "paddle/math/MathUtils.h" -#include "paddle/utils/GlobalConstants.h" +#include "paddle/legacy/gserver/layers/DataLayer.h" +#include "paddle/legacy/math/MathUtils.h" +#include "paddle/legacy/utils/GlobalConstants.h" #include "LayerGradUtil.h" #include "paddle/testing/TestUtil.h" diff --git a/paddle/gserver/tests/test_CrossEntropyOverBeamGrad.cpp b/paddle/legacy/gserver/tests/test_CrossEntropyOverBeamGrad.cpp similarity index 99% rename from paddle/gserver/tests/test_CrossEntropyOverBeamGrad.cpp rename to paddle/legacy/gserver/tests/test_CrossEntropyOverBeamGrad.cpp index 0041ed3093..34eb0dedff 100644 --- a/paddle/gserver/tests/test_CrossEntropyOverBeamGrad.cpp +++ b/paddle/legacy/gserver/tests/test_CrossEntropyOverBeamGrad.cpp @@ -17,7 +17,7 @@ limitations under the License. */ #include #include "ModelConfig.pb.h" -#include "paddle/gserver/layers/DataLayer.h" +#include "paddle/legacy/gserver/layers/DataLayer.h" #include "LayerGradUtil.h" #include "paddle/testing/TestUtil.h" diff --git a/paddle/gserver/tests/test_DetectionOutput.cpp b/paddle/legacy/gserver/tests/test_DetectionOutput.cpp similarity index 100% rename from paddle/gserver/tests/test_DetectionOutput.cpp rename to paddle/legacy/gserver/tests/test_DetectionOutput.cpp diff --git a/paddle/gserver/tests/test_Evaluator.cpp b/paddle/legacy/gserver/tests/test_Evaluator.cpp similarity index 99% rename from paddle/gserver/tests/test_Evaluator.cpp rename to paddle/legacy/gserver/tests/test_Evaluator.cpp index 4a8843f3af..8aab50d23e 100644 --- a/paddle/gserver/tests/test_Evaluator.cpp +++ b/paddle/legacy/gserver/tests/test_Evaluator.cpp @@ -15,8 +15,8 @@ limitations under the License. */ #include #include #include "ModelConfig.pb.h" +#include "paddle/legacy/trainer/Trainer.h" #include "paddle/testing/TestUtil.h" -#include "paddle/trainer/Trainer.h" using namespace paddle; // NOLINT using namespace std; // NOLINT diff --git a/paddle/gserver/tests/test_Expand.cpp b/paddle/legacy/gserver/tests/test_Expand.cpp similarity index 100% rename from paddle/gserver/tests/test_Expand.cpp rename to paddle/legacy/gserver/tests/test_Expand.cpp diff --git a/paddle/gserver/tests/test_KmaxSeqScore.cpp b/paddle/legacy/gserver/tests/test_KmaxSeqScore.cpp similarity index 98% rename from paddle/gserver/tests/test_KmaxSeqScore.cpp rename to paddle/legacy/gserver/tests/test_KmaxSeqScore.cpp index 168ffbdac8..e15b4e5038 100644 --- a/paddle/gserver/tests/test_KmaxSeqScore.cpp +++ b/paddle/legacy/gserver/tests/test_KmaxSeqScore.cpp @@ -17,8 +17,8 @@ limitations under the License. */ #include #include #include "ModelConfig.pb.h" -#include "paddle/gserver/layers/DataLayer.h" -#include "paddle/utils/GlobalConstants.h" +#include "paddle/legacy/gserver/layers/DataLayer.h" +#include "paddle/legacy/utils/GlobalConstants.h" #include "LayerGradUtil.h" #include "paddle/testing/TestUtil.h" diff --git a/paddle/gserver/tests/test_LayerGrad.cpp b/paddle/legacy/gserver/tests/test_LayerGrad.cpp similarity index 99% rename from paddle/gserver/tests/test_LayerGrad.cpp rename to paddle/legacy/gserver/tests/test_LayerGrad.cpp index 1254d58050..979cf8ee67 100644 --- a/paddle/gserver/tests/test_LayerGrad.cpp +++ b/paddle/legacy/gserver/tests/test_LayerGrad.cpp @@ -19,8 +19,8 @@ limitations under the License. */ #include #include #include "ModelConfig.pb.h" -#include "paddle/gserver/layers/DataLayer.h" -#include "paddle/math/MathUtils.h" +#include "paddle/legacy/gserver/layers/DataLayer.h" +#include "paddle/legacy/math/MathUtils.h" #include "LayerGradUtil.h" #include "paddle/testing/TestUtil.h" diff --git a/paddle/gserver/tests/test_LinearChainCRF.cpp b/paddle/legacy/gserver/tests/test_LinearChainCRF.cpp similarity index 95% rename from paddle/gserver/tests/test_LinearChainCRF.cpp rename to paddle/legacy/gserver/tests/test_LinearChainCRF.cpp index 423c31e27d..7082c1363a 100644 --- a/paddle/gserver/tests/test_LinearChainCRF.cpp +++ b/paddle/legacy/gserver/tests/test_LinearChainCRF.cpp @@ -14,8 +14,8 @@ limitations under the License. */ #include #include -#include "paddle/gserver/layers/LinearChainCRF.h" -#include "paddle/utils/Util.h" +#include "paddle/legacy/gserver/layers/LinearChainCRF.h" +#include "paddle/legacy/utils/Util.h" using namespace paddle; // NOLINT using namespace std; // NOLINT diff --git a/paddle/gserver/tests/test_MKLDNN.cpp b/paddle/legacy/gserver/tests/test_MKLDNN.cpp similarity index 98% rename from paddle/gserver/tests/test_MKLDNN.cpp rename to paddle/legacy/gserver/tests/test_MKLDNN.cpp index a34a3f6206..c79ccd1956 100644 --- a/paddle/gserver/tests/test_MKLDNN.cpp +++ b/paddle/legacy/gserver/tests/test_MKLDNN.cpp @@ -13,13 +13,13 @@ See the License for the specific language governing permissions and limitations under the License. */ #include -#include +#include #include #include #include "MKLDNNTester.h" #include "ModelConfig.pb.h" -#include "paddle/gserver/activations/MKLDNNActivation.h" -#include "paddle/math/MathUtils.h" +#include "paddle/legacy/gserver/activations/MKLDNNActivation.h" +#include "paddle/legacy/math/MathUtils.h" using namespace paddle; // NOLINT @@ -426,7 +426,7 @@ DECLARE_string(config_args); TEST(MKLDNNNet, net) { std::vector cases = {"simple", "branch"}; for (auto name : cases) { - std::string config = "./gserver/tests/mkldnn_" + name + "_net.conf"; + std::string config = "./legacy/gserver/tests/mkldnn_" + name + "_net.conf"; for (auto channels : {2, 32}) { std::ostringstream oss; oss << "channels=" << channels; diff --git a/paddle/gserver/tests/test_MaxPoolingWithMaskOutput.cpp b/paddle/legacy/gserver/tests/test_MaxPoolingWithMaskOutput.cpp similarity index 98% rename from paddle/gserver/tests/test_MaxPoolingWithMaskOutput.cpp rename to paddle/legacy/gserver/tests/test_MaxPoolingWithMaskOutput.cpp index 5188d2abed..2bc261b4a8 100644 --- a/paddle/gserver/tests/test_MaxPoolingWithMaskOutput.cpp +++ b/paddle/legacy/gserver/tests/test_MaxPoolingWithMaskOutput.cpp @@ -17,7 +17,7 @@ limitations under the License. */ #include #include "LayerGradUtil.h" -#include "paddle/math/MathUtils.h" +#include "paddle/legacy/math/MathUtils.h" #include "paddle/testing/TestUtil.h" using namespace paddle; diff --git a/paddle/gserver/tests/test_MultinomialSampler.cpp b/paddle/legacy/gserver/tests/test_MultinomialSampler.cpp similarity index 96% rename from paddle/gserver/tests/test_MultinomialSampler.cpp rename to paddle/legacy/gserver/tests/test_MultinomialSampler.cpp index 043025239e..25b1a1191d 100644 --- a/paddle/gserver/tests/test_MultinomialSampler.cpp +++ b/paddle/legacy/gserver/tests/test_MultinomialSampler.cpp @@ -18,10 +18,10 @@ limitations under the License. */ #include #undef PADDLE_DISABLE_TIMER -#include "paddle/utils/Stat.h" +#include "paddle/legacy/utils/Stat.h" -#include "paddle/gserver/layers/MultinomialSampler.h" -#include "paddle/utils/Util.h" +#include "paddle/legacy/gserver/layers/MultinomialSampler.h" +#include "paddle/legacy/utils/Util.h" using namespace paddle; // NOLINT using namespace std; // NOLINT diff --git a/paddle/gserver/tests/test_NetworkCompare.cpp b/paddle/legacy/gserver/tests/test_NetworkCompare.cpp similarity index 88% rename from paddle/gserver/tests/test_NetworkCompare.cpp rename to paddle/legacy/gserver/tests/test_NetworkCompare.cpp index fda3f2f793..c9f9f3e61b 100644 --- a/paddle/gserver/tests/test_NetworkCompare.cpp +++ b/paddle/legacy/gserver/tests/test_NetworkCompare.cpp @@ -14,13 +14,13 @@ limitations under the License. */ #undef PADDLE_DISABLE_TIMER #include -#include +#include #include #include +#include "paddle/legacy/trainer/Trainer.h" +#include "paddle/legacy/utils/Stat.h" #include "paddle/testing/TestUtil.h" -#include "paddle/trainer/Trainer.h" -#include "paddle/utils/Stat.h" using namespace paddle; // NOLINT using namespace std; // NOLINT @@ -220,33 +220,33 @@ void compareNetwork(const std::string& config_file_a, } TEST(Compare, concat_dotmul) { - std::string config_file_a = "./gserver/tests/concat_dotmul_a.conf"; - std::string config_file_b = "./gserver/tests/concat_dotmul_b.conf"; + std::string config_file_a = "./legacy/gserver/tests/concat_dotmul_a.conf"; + std::string config_file_b = "./legacy/gserver/tests/concat_dotmul_b.conf"; compareNetwork(config_file_a, config_file_b); } TEST(Compare, concat_fullmatrix) { - std::string config_file_a = "./gserver/tests/concat_fullmatrix_a.conf"; - std::string config_file_b = "./gserver/tests/concat_fullmatrix_b.conf"; + std::string config_file_a = "./legacy/gserver/tests/concat_fullmatrix_a.conf"; + std::string config_file_b = "./legacy/gserver/tests/concat_fullmatrix_b.conf"; compareNetwork(config_file_a, config_file_b); } TEST(Compare, concat_table) { - std::string config_file_a = "./gserver/tests/concat_table_a.conf"; - std::string config_file_b = "./gserver/tests/concat_table_b.conf"; + std::string config_file_a = "./legacy/gserver/tests/concat_table_a.conf"; + std::string config_file_b = "./legacy/gserver/tests/concat_table_b.conf"; compareNetwork(config_file_a, config_file_b); } TEST(Compare, concat_slice) { - std::string config_file_a = "./gserver/tests/concat_slice_a.conf"; - std::string config_file_b = "./gserver/tests/concat_slice_b.conf"; + std::string config_file_a = "./legacy/gserver/tests/concat_slice_a.conf"; + std::string config_file_b = "./legacy/gserver/tests/concat_slice_b.conf"; compareNetwork(config_file_a, config_file_b); } #ifdef PADDLE_WITH_CUDA TEST(Compare, img_pool) { - std::string config_file_a = "./gserver/tests/img_pool_a.conf"; - std::string config_file_b = "./gserver/tests/img_pool_b.conf"; + std::string config_file_a = "./legacy/gserver/tests/img_pool_a.conf"; + std::string config_file_b = "./legacy/gserver/tests/img_pool_b.conf"; bool useGpu = FLAGS_use_gpu; FLAGS_use_gpu = true; compareNetwork(config_file_a, config_file_b); @@ -254,8 +254,8 @@ TEST(Compare, img_pool) { } TEST(Compare, img_conv) { - std::string config_file_a = "./gserver/tests/img_conv_a.conf"; - std::string config_file_b = "./gserver/tests/img_conv_b.conf"; + std::string config_file_a = "./legacy/gserver/tests/img_conv_a.conf"; + std::string config_file_b = "./legacy/gserver/tests/img_conv_b.conf"; bool useGpu = FLAGS_use_gpu; FLAGS_use_gpu = true; compareNetwork(config_file_a, config_file_b); @@ -264,8 +264,8 @@ TEST(Compare, img_conv) { // Test cudnn_conv and exconv give the same result TEST(Compare, img_conv2) { - std::string config_file_a = "./gserver/tests/img_conv_cudnn.py"; - std::string config_file_b = "./gserver/tests/img_conv_exconv.py"; + std::string config_file_a = "./legacy/gserver/tests/img_conv_cudnn.py"; + std::string config_file_b = "./legacy/gserver/tests/img_conv_exconv.py"; bool useGpu = FLAGS_use_gpu; double eps = FLAGS_checkgrad_eps; FLAGS_use_gpu = true; diff --git a/paddle/gserver/tests/test_PriorBox.cpp b/paddle/legacy/gserver/tests/test_PriorBox.cpp similarity index 100% rename from paddle/gserver/tests/test_PriorBox.cpp rename to paddle/legacy/gserver/tests/test_PriorBox.cpp diff --git a/paddle/gserver/tests/test_PyDataProvider.cpp b/paddle/legacy/gserver/tests/test_PyDataProvider.cpp similarity index 95% rename from paddle/gserver/tests/test_PyDataProvider.cpp rename to paddle/legacy/gserver/tests/test_PyDataProvider.cpp index a1dee97950..0209e6818a 100644 --- a/paddle/gserver/tests/test_PyDataProvider.cpp +++ b/paddle/legacy/gserver/tests/test_PyDataProvider.cpp @@ -17,8 +17,8 @@ limitations under the License. */ #include -#include "paddle/gserver/dataproviders/PyDataProvider.h" -#include "paddle/utils/Util.h" +#include "paddle/legacy/gserver/dataproviders/PyDataProvider.h" +#include "paddle/legacy/utils/Util.h" #include "paddle/testing/TestUtil.h" @@ -35,7 +35,8 @@ TEST(PyDataProvider, py_fill_slots) { config.set_load_data_module(std::string("pyDataProvider")); config.set_load_data_object(std::string("SimpleDataProvider")); config.clear_files(); - std::string dataFile = "gserver/tests/pyDataProvider/pyDataProviderList"; + std::string dataFile = + "legacy/gserver/tests/pyDataProvider/pyDataProviderList"; config.set_files(dataFile); #ifndef PADDLE_WITH_CUDA bool useGpu = false; @@ -68,7 +69,8 @@ TEST(PyDataProvider, py_fill_nest_slots) { config.set_load_data_module(std::string("pyDataProvider")); config.set_load_data_object(std::string("SimpleNestDataProvider")); config.clear_files(); - std::string dataFile = "gserver/tests/pyDataProvider/pyDataProviderList"; + std::string dataFile = + "legacy/gserver/tests/pyDataProvider/pyDataProviderList"; config.set_files(dataFile); EXPECT_EQ(config.IsInitialized(), true); #ifndef PADDLE_WITH_CUDA diff --git a/paddle/gserver/tests/test_PyDataProvider2.cpp b/paddle/legacy/gserver/tests/test_PyDataProvider2.cpp similarity index 98% rename from paddle/gserver/tests/test_PyDataProvider2.cpp rename to paddle/legacy/gserver/tests/test_PyDataProvider2.cpp index b39fb35345..de313ba82c 100644 --- a/paddle/gserver/tests/test_PyDataProvider2.cpp +++ b/paddle/legacy/gserver/tests/test_PyDataProvider2.cpp @@ -15,9 +15,9 @@ limitations under the License. */ #ifndef PADDLE_NO_PYTHON #include #include -#include "paddle/gserver/dataproviders/DataProvider.h" -#include "paddle/utils/PythonUtil.h" -#include "paddle/utils/Util.h" +#include "paddle/legacy/gserver/dataproviders/DataProvider.h" +#include "paddle/legacy/utils/PythonUtil.h" +#include "paddle/legacy/utils/Util.h" DEFINE_string(train_list, "unittest.list", "file list for unittest"); diff --git a/paddle/gserver/tests/test_PyDataProvider2.py b/paddle/legacy/gserver/tests/test_PyDataProvider2.py similarity index 100% rename from paddle/gserver/tests/test_PyDataProvider2.py rename to paddle/legacy/gserver/tests/test_PyDataProvider2.py diff --git a/paddle/gserver/tests/test_RecurrentGradientMachine.cpp b/paddle/legacy/gserver/tests/test_RecurrentGradientMachine.cpp similarity index 79% rename from paddle/gserver/tests/test_RecurrentGradientMachine.cpp rename to paddle/legacy/gserver/tests/test_RecurrentGradientMachine.cpp index 9770567b88..153c3e7f36 100644 --- a/paddle/gserver/tests/test_RecurrentGradientMachine.cpp +++ b/paddle/legacy/gserver/tests/test_RecurrentGradientMachine.cpp @@ -13,13 +13,13 @@ See the License for the specific language governing permissions and limitations under the License. */ #include -#include -#include -#include -#include -#include -#include -#include +#include +#include +#include +#include +#include +#include +#include DECLARE_int32(seed); @@ -102,11 +102,11 @@ void test(const string& conf1, const string& conf2, double eps, bool useGpu) { FLAGS_use_gpu = useGpu; int num_passes = 5; real* cost1 = new real[num_passes]; - const string dir1 = "gserver/tests/t1"; + const string dir1 = "legacy/gserver/tests/t1"; CalCost(conf1, dir1, cost1, num_passes); real* cost2 = new real[num_passes]; - const string dir2 = "gserver/tests/t2"; + const string dir2 = "legacy/gserver/tests/t2"; CalCost(conf2, dir2, cost2, num_passes); for (int i = 0; i < num_passes; i++) { @@ -121,8 +121,8 @@ void test(const string& conf1, const string& conf2, double eps, bool useGpu) { TEST(RecurrentGradientMachine, HasSubSequence) { for (bool useGpu : {false, true}) { - test("gserver/tests/sequence_layer_group.conf", - "gserver/tests/sequence_nest_layer_group.conf", + test("legacy/gserver/tests/sequence_layer_group.conf", + "legacy/gserver/tests/sequence_nest_layer_group.conf", 1e-5, useGpu); } @@ -130,8 +130,8 @@ TEST(RecurrentGradientMachine, HasSubSequence) { TEST(RecurrentGradientMachine, rnn) { for (bool useGpu : {false, true}) { - test("gserver/tests/sequence_rnn.conf", - "gserver/tests/sequence_nest_rnn.conf", + test("legacy/gserver/tests/sequence_rnn.conf", + "legacy/gserver/tests/sequence_nest_rnn.conf", 1e-6, useGpu); } @@ -139,8 +139,8 @@ TEST(RecurrentGradientMachine, rnn) { TEST(RecurrentGradientMachine, rnn_multi_input) { for (bool useGpu : {false, true}) { - test("gserver/tests/sequence_rnn_multi_input.conf", - "gserver/tests/sequence_nest_rnn_multi_input.conf", + test("legacy/gserver/tests/sequence_rnn_multi_input.conf", + "legacy/gserver/tests/sequence_nest_rnn_multi_input.conf", 1e-6, useGpu); } @@ -148,8 +148,8 @@ TEST(RecurrentGradientMachine, rnn_multi_input) { TEST(RecurrentGradientMachine, rnn_multi_unequalength_input) { for (bool useGpu : {false, true}) { - test("gserver/tests/sequence_rnn_multi_unequalength_inputs.py", - "gserver/tests/sequence_nest_rnn_multi_unequalength_inputs.py", + test("legacy/gserver/tests/sequence_rnn_multi_unequalength_inputs.py", + "legacy/gserver/tests/sequence_nest_rnn_multi_unequalength_inputs.py", 1e-6, useGpu); } @@ -157,8 +157,8 @@ TEST(RecurrentGradientMachine, rnn_multi_unequalength_input) { TEST(RecurrentGradientMachine, rnn_mixed_input) { for (bool useGpu : {false, true}) { - test("gserver/tests/sequence_rnn_mixed_inputs.py", - "gserver/tests/sequence_rnn_matched_inputs.py", + test("legacy/gserver/tests/sequence_rnn_mixed_inputs.py", + "legacy/gserver/tests/sequence_rnn_matched_inputs.py", 1e-6, useGpu); } diff --git a/paddle/gserver/tests/test_RecurrentLayer.cpp b/paddle/legacy/gserver/tests/test_RecurrentLayer.cpp similarity index 98% rename from paddle/gserver/tests/test_RecurrentLayer.cpp rename to paddle/legacy/gserver/tests/test_RecurrentLayer.cpp index b54e37b7db..71198cb6a1 100644 --- a/paddle/gserver/tests/test_RecurrentLayer.cpp +++ b/paddle/legacy/gserver/tests/test_RecurrentLayer.cpp @@ -13,11 +13,11 @@ See the License for the specific language governing permissions and limitations under the License. */ #include -#include +#include #include #include "ModelConfig.pb.h" -#include "paddle/gserver/layers/DataLayer.h" -#include "paddle/gserver/layers/Layer.h" +#include "paddle/legacy/gserver/layers/DataLayer.h" +#include "paddle/legacy/gserver/layers/Layer.h" #include "paddle/testing/TestUtil.h" @@ -220,9 +220,9 @@ TEST(Layer, RecurrentLayer) { } #define protected public -#include "paddle/gserver/layers/GatedRecurrentLayer.h" -#include "paddle/gserver/layers/LstmLayer.h" -#include "paddle/gserver/layers/RecurrentLayer.h" +#include "paddle/legacy/gserver/layers/GatedRecurrentLayer.h" +#include "paddle/legacy/gserver/layers/LstmLayer.h" +#include "paddle/legacy/gserver/layers/RecurrentLayer.h" template class TestRecurrentLayer { public: @@ -423,7 +423,7 @@ TEST(Layer, LstmLayer) { #ifdef PADDLE_WITH_MKLML -#include "paddle/gserver/layers/MKLPackedRecurrentLayer.h" +#include "paddle/legacy/gserver/layers/MKLPackedRecurrentLayer.h" LayerPtr initMKLPackedLayer(LayerConfig layerConfig, bool reversed, diff --git a/paddle/gserver/tests/test_SelectiveFCLayer.cpp b/paddle/legacy/gserver/tests/test_SelectiveFCLayer.cpp similarity index 94% rename from paddle/gserver/tests/test_SelectiveFCLayer.cpp rename to paddle/legacy/gserver/tests/test_SelectiveFCLayer.cpp index 583e3bc545..1975d9196d 100644 --- a/paddle/gserver/tests/test_SelectiveFCLayer.cpp +++ b/paddle/legacy/gserver/tests/test_SelectiveFCLayer.cpp @@ -14,16 +14,16 @@ limitations under the License. */ #include #include -#include +#include #include #include #include #include "ModelConfig.pb.h" -#include "paddle/gserver/layers/DataLayer.h" -#include "paddle/gserver/layers/FullyConnectedLayer.h" -#include "paddle/gserver/layers/Layer.h" -#include "paddle/gserver/layers/SelectiveFullyConnectedLayer.h" -#include "paddle/math/CpuSparseMatrix.h" +#include "paddle/legacy/gserver/layers/DataLayer.h" +#include "paddle/legacy/gserver/layers/FullyConnectedLayer.h" +#include "paddle/legacy/gserver/layers/Layer.h" +#include "paddle/legacy/gserver/layers/SelectiveFullyConnectedLayer.h" +#include "paddle/legacy/math/CpuSparseMatrix.h" using namespace paddle; // NOLINT using namespace std; // NOLINT @@ -76,7 +76,7 @@ void calcOutput(ComData& comData, FLAGS_config = configFile; FLAGS_config_args = configArgs; FLAGS_use_gpu = useGpu; - FLAGS_init_model_path = "gserver/tests/SelectiveFcTest/model"; + FLAGS_init_model_path = "legacy/gserver/tests/SelectiveFcTest/model"; *ThreadLocalRand::getSeed() = 0; srand(0); @@ -311,13 +311,13 @@ LayerPtr initFcLayer(LayerPtr dataLayer, #ifndef PADDLE_TYPE_DOUBLE // The parameter file used in fc.conf and selective_fc.conf is float TEST(Layer, SelectiveFcLayer_train_dense_mul) { - const string& fcConfig = "gserver/tests/SelectiveFcTest/conf/fc.conf"; + const string& fcConfig = "legacy/gserver/tests/SelectiveFcTest/conf/fc.conf"; const string& fcConfigArgs = - "filelist=gserver/tests/SelectiveFcTest/dense_mul_list"; + "filelist=legacy/gserver/tests/SelectiveFcTest/dense_mul_list"; const string& selFcConfig = - "gserver/tests/SelectiveFcTest/conf/selective_fc.conf"; + "legacy/gserver/tests/SelectiveFcTest/conf/selective_fc.conf"; const string& selConfigArgs = - "filelist=gserver/tests/SelectiveFcTest/dense_mul_list"; + "filelist=legacy/gserver/tests/SelectiveFcTest/dense_mul_list"; for (auto useGpu : {false, true}) { #ifndef PADDLE_WITH_CUDA @@ -350,7 +350,7 @@ void testSelectiveFcLayerTrainSparseMul(const LayerConfig& config, creatDataLayer("data", batchSize, dataLayerSize, values, useGpu); const string& selfcParaFile = - "gserver/tests/SelectiveFcTest/model/rand_fc_param.w.transpose"; + "legacy/gserver/tests/SelectiveFcTest/model/rand_fc_param.w.transpose"; const string& selfcParaName = "rand_fc_param.w.transpose"; std::shared_ptr selfcLayer = @@ -396,7 +396,7 @@ void testSelectiveFcLayerTrainSparseMul(const LayerConfig& config, size_t nnz = cpuOutMatSelfc->getElementCnt(); const string& fcParaFile = - "gserver/tests/SelectiveFcTest/model/rand_fc_param.w"; + "legacy/gserver/tests/SelectiveFcTest/model/rand_fc_param.w"; const string& fcParaName = "rand_fc_param.w"; LayerConfig fcLayerConfig; fcLayerConfig.set_name("fc_layer"); diff --git a/paddle/gserver/tests/test_SeqSliceLayerGrad.cpp b/paddle/legacy/gserver/tests/test_SeqSliceLayerGrad.cpp similarity index 99% rename from paddle/gserver/tests/test_SeqSliceLayerGrad.cpp rename to paddle/legacy/gserver/tests/test_SeqSliceLayerGrad.cpp index 406ca63b6e..05acd71421 100644 --- a/paddle/gserver/tests/test_SeqSliceLayerGrad.cpp +++ b/paddle/legacy/gserver/tests/test_SeqSliceLayerGrad.cpp @@ -14,7 +14,7 @@ limitations under the License. */ #include #include "ModelConfig.pb.h" -#include "paddle/gserver/layers/DataLayer.h" +#include "paddle/legacy/gserver/layers/DataLayer.h" #include "LayerGradUtil.h" #include "paddle/testing/TestUtil.h" diff --git a/paddle/gserver/tests/test_Upsample.cpp b/paddle/legacy/gserver/tests/test_Upsample.cpp similarity index 99% rename from paddle/gserver/tests/test_Upsample.cpp rename to paddle/legacy/gserver/tests/test_Upsample.cpp index 39b902fcc7..940d46baf7 100644 --- a/paddle/gserver/tests/test_Upsample.cpp +++ b/paddle/legacy/gserver/tests/test_Upsample.cpp @@ -17,7 +17,7 @@ limitations under the License. */ #include #include "LayerGradUtil.h" -#include "paddle/math/MathUtils.h" +#include "paddle/legacy/math/MathUtils.h" #include "paddle/testing/TestUtil.h" void setPoolConfig(paddle::TestConfig* config, diff --git a/paddle/gserver/tests/test_WarpCTCLayer.cpp b/paddle/legacy/gserver/tests/test_WarpCTCLayer.cpp similarity index 96% rename from paddle/gserver/tests/test_WarpCTCLayer.cpp rename to paddle/legacy/gserver/tests/test_WarpCTCLayer.cpp index f2299d7da2..b1697e1616 100644 --- a/paddle/gserver/tests/test_WarpCTCLayer.cpp +++ b/paddle/legacy/gserver/tests/test_WarpCTCLayer.cpp @@ -13,12 +13,12 @@ See the License for the specific language governing permissions and limitations under the License. */ #include -#include +#include #include "ModelConfig.pb.h" -#include "paddle/gserver/layers/CTCLayer.h" -#include "paddle/gserver/layers/DataLayer.h" -#include "paddle/gserver/layers/Layer.h" -#include "paddle/gserver/layers/WarpCTCLayer.h" +#include "paddle/legacy/gserver/layers/CTCLayer.h" +#include "paddle/legacy/gserver/layers/DataLayer.h" +#include "paddle/legacy/gserver/layers/Layer.h" +#include "paddle/legacy/gserver/layers/WarpCTCLayer.h" #include "paddle/testing/TestUtil.h" diff --git a/paddle/math/Allocator.h b/paddle/legacy/math/Allocator.h similarity index 98% rename from paddle/math/Allocator.h rename to paddle/legacy/math/Allocator.h index c43a83891e..ffb5ec1cad 100644 --- a/paddle/math/Allocator.h +++ b/paddle/legacy/math/Allocator.h @@ -17,7 +17,7 @@ limitations under the License. */ #include #include #include "hl_gpu.h" -#include "paddle/utils/Logging.h" +#include "paddle/legacy/utils/Logging.h" namespace paddle { diff --git a/paddle/math/BaseMatrix.cu b/paddle/legacy/math/BaseMatrix.cu similarity index 99% rename from paddle/math/BaseMatrix.cu rename to paddle/legacy/math/BaseMatrix.cu index 7b57419e5a..7e7cdc57a9 100644 --- a/paddle/math/BaseMatrix.cu +++ b/paddle/legacy/math/BaseMatrix.cu @@ -12,7 +12,7 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. */ -#include +#include #include #include #include "BaseMatrix.h" diff --git a/paddle/math/BaseMatrix.h b/paddle/legacy/math/BaseMatrix.h similarity index 99% rename from paddle/math/BaseMatrix.h rename to paddle/legacy/math/BaseMatrix.h index 1958629aa0..4627f847d3 100644 --- a/paddle/math/BaseMatrix.h +++ b/paddle/legacy/math/BaseMatrix.h @@ -16,7 +16,7 @@ limitations under the License. */ #include #include #include "TensorExpression.h" -#include "paddle/utils/Common.h" +#include "paddle/legacy/utils/Common.h" namespace paddle { diff --git a/paddle/math/CMakeLists.txt b/paddle/legacy/math/CMakeLists.txt similarity index 84% rename from paddle/math/CMakeLists.txt rename to paddle/legacy/math/CMakeLists.txt index 3c897b5f3e..9992ec71f4 100644 --- a/paddle/math/CMakeLists.txt +++ b/paddle/legacy/math/CMakeLists.txt @@ -37,13 +37,13 @@ if(MOBILE_INFERENCE) ${CMAKE_CURRENT_SOURCE_DIR}/SparseRowMatrix.cpp) endif() set(MATH_SOURCES - "${PADDLE_SOURCE_DIR}/paddle/math/BaseMatrix.cu" - "${PADDLE_SOURCE_DIR}/paddle/math/TrainingAlgorithmOp.cu" + "${PADDLE_SOURCE_DIR}/paddle/legacy/math/BaseMatrix.cu" + "${PADDLE_SOURCE_DIR}/paddle/legacy/math/TrainingAlgorithmOp.cu" ${MATH_SOURCES}) if(NOT WITH_GPU) # then compile BaseMatrix.cu as c++ file - compile_cu_as_cpp("${PADDLE_SOURCE_DIR}/paddle/math/BaseMatrix.cu") - compile_cu_as_cpp("${PADDLE_SOURCE_DIR}/paddle/math/TrainingAlgorithmOp.cu") + compile_cu_as_cpp("${PADDLE_SOURCE_DIR}/paddle/legacy/math/BaseMatrix.cu") + compile_cu_as_cpp("${PADDLE_SOURCE_DIR}/paddle/legacy/math/TrainingAlgorithmOp.cu") add_library(paddle_math STATIC ${MATH_SOURCES}) else() diff --git a/paddle/math/CpuSparseMatrix.cpp b/paddle/legacy/math/CpuSparseMatrix.cpp similarity index 99% rename from paddle/math/CpuSparseMatrix.cpp rename to paddle/legacy/math/CpuSparseMatrix.cpp index 023450ffb7..20c65a3a1d 100644 --- a/paddle/math/CpuSparseMatrix.cpp +++ b/paddle/legacy/math/CpuSparseMatrix.cpp @@ -16,8 +16,8 @@ limitations under the License. */ #include "SparseMatrix.h" #include "float.h" #include "hl_gpu.h" -#include "paddle/math/MathUtils.h" -#include "paddle/utils/Util.h" +#include "paddle/legacy/math/MathUtils.h" +#include "paddle/legacy/utils/Util.h" namespace paddle { diff --git a/paddle/math/CpuSparseMatrix.h b/paddle/legacy/math/CpuSparseMatrix.h similarity index 100% rename from paddle/math/CpuSparseMatrix.h rename to paddle/legacy/math/CpuSparseMatrix.h diff --git a/paddle/math/ExecViaCpu.h b/paddle/legacy/math/ExecViaCpu.h similarity index 100% rename from paddle/math/ExecViaCpu.h rename to paddle/legacy/math/ExecViaCpu.h diff --git a/paddle/math/MKLDNNMatrix.cpp b/paddle/legacy/math/MKLDNNMatrix.cpp similarity index 100% rename from paddle/math/MKLDNNMatrix.cpp rename to paddle/legacy/math/MKLDNNMatrix.cpp diff --git a/paddle/math/MKLDNNMatrix.h b/paddle/legacy/math/MKLDNNMatrix.h similarity index 99% rename from paddle/math/MKLDNNMatrix.h rename to paddle/legacy/math/MKLDNNMatrix.h index d4a78f3e54..5a0e5f8592 100644 --- a/paddle/math/MKLDNNMatrix.h +++ b/paddle/legacy/math/MKLDNNMatrix.h @@ -17,7 +17,7 @@ limitations under the License. */ #include #include "Matrix.h" #include "mkldnn.hpp" -#include "paddle/parameter/Parameter.h" +#include "paddle/legacy/parameter/Parameter.h" namespace paddle { diff --git a/paddle/math/MathFunctions.cpp b/paddle/legacy/math/MathFunctions.cpp similarity index 99% rename from paddle/math/MathFunctions.cpp rename to paddle/legacy/math/MathFunctions.cpp index f48119aa51..bbf34a32f3 100644 --- a/paddle/math/MathFunctions.cpp +++ b/paddle/legacy/math/MathFunctions.cpp @@ -12,10 +12,10 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. */ -#include "paddle/math/MathFunctions.h" +#include "paddle/legacy/math/MathFunctions.h" #include "hl_matrix_apply.cuh" #include "hl_matrix_ops.cuh" -#include "paddle/utils/DynamicLoader.h" +#include "paddle/legacy/utils/DynamicLoader.h" namespace dynload { diff --git a/paddle/math/MathFunctions.h b/paddle/legacy/math/MathFunctions.h similarity index 100% rename from paddle/math/MathFunctions.h rename to paddle/legacy/math/MathFunctions.h diff --git a/paddle/math/MathUtils.cpp b/paddle/legacy/math/MathUtils.cpp similarity index 98% rename from paddle/math/MathUtils.cpp rename to paddle/legacy/math/MathUtils.cpp index b2afdbcd51..47ac9c187c 100644 --- a/paddle/math/MathUtils.cpp +++ b/paddle/legacy/math/MathUtils.cpp @@ -15,7 +15,7 @@ limitations under the License. */ #include "MathUtils.h" #include #include "Vector.h" -#include "paddle/utils/Logging.h" +#include "paddle/legacy/utils/Logging.h" namespace paddle { diff --git a/paddle/math/MathUtils.h b/paddle/legacy/math/MathUtils.h similarity index 100% rename from paddle/math/MathUtils.h rename to paddle/legacy/math/MathUtils.h diff --git a/paddle/math/Matrix.cpp b/paddle/legacy/math/Matrix.cpp similarity index 99% rename from paddle/math/Matrix.cpp rename to paddle/legacy/math/Matrix.cpp index bcd6dfe1fd..e53f95006c 100644 --- a/paddle/math/Matrix.cpp +++ b/paddle/legacy/math/Matrix.cpp @@ -26,11 +26,11 @@ limitations under the License. */ #include "hl_gpu.h" #include "hl_table_apply.h" #include "hl_top_k.h" -#include "paddle/utils/Logging.h" +#include "paddle/legacy/utils/Logging.h" #include "NEONFunctions.h" -#include "paddle/function/GemmFunctor.h" -#include "paddle/utils/ThreadLocal.h" +#include "paddle/legacy/function/GemmFunctor.h" +#include "paddle/legacy/utils/ThreadLocal.h" #include "SIMDFunctions.h" diff --git a/paddle/math/Matrix.h b/paddle/legacy/math/Matrix.h similarity index 99% rename from paddle/math/Matrix.h rename to paddle/legacy/math/Matrix.h index 4c3b2c9536..ff4f4cfc2a 100644 --- a/paddle/math/Matrix.h +++ b/paddle/legacy/math/Matrix.h @@ -18,20 +18,20 @@ limitations under the License. */ #include #include -#include "paddle/utils/Logging.h" -#include "paddle/utils/ThreadLocal.h" +#include "paddle/legacy/utils/Logging.h" +#include "paddle/legacy/utils/ThreadLocal.h" #include #include "BaseMatrix.h" #include "MemoryHandle.h" #include "Vector.h" -#include "paddle/utils/Common.h" -#include "paddle/utils/ThreadLocal.h" +#include "paddle/legacy/utils/Common.h" +#include "paddle/legacy/utils/ThreadLocal.h" namespace paddle { -/// TODO(tianbing), move to paddle/function/TensorType.h +/// TODO(tianbing), move to paddle/legacy/function/TensorType.h enum SparseValueType { NO_VALUE = 0, FLOAT_VALUE = 1 }; /** @@ -57,7 +57,7 @@ enum SparseValueType { NO_VALUE = 0, FLOAT_VALUE = 1 }; * value [1, 1, 2, 2, 5] * @endcode */ -/// TODO(tianbing), move to paddle/function/TensorType.h +/// TODO(tianbing), move to paddle/legacy/function/TensorType.h enum SparseFormat { SPARSE_CSR = 0, SPARSE_CSC = 1 }; class Matrix; diff --git a/paddle/math/MatrixBitCode.cpp b/paddle/legacy/math/MatrixBitCode.cpp similarity index 99% rename from paddle/math/MatrixBitCode.cpp rename to paddle/legacy/math/MatrixBitCode.cpp index f7a949294b..f35f266a30 100644 --- a/paddle/math/MatrixBitCode.cpp +++ b/paddle/legacy/math/MatrixBitCode.cpp @@ -14,8 +14,8 @@ limitations under the License. */ #include "Matrix.h" #include "hl_gpu.h" -#include "paddle/utils/Logging.h" -#include "paddle/utils/Util.h" +#include "paddle/legacy/utils/Logging.h" +#include "paddle/legacy/utils/Util.h" namespace paddle { diff --git a/paddle/math/MemoryHandle.cpp b/paddle/legacy/math/MemoryHandle.cpp similarity index 100% rename from paddle/math/MemoryHandle.cpp rename to paddle/legacy/math/MemoryHandle.cpp diff --git a/paddle/math/MemoryHandle.h b/paddle/legacy/math/MemoryHandle.h similarity index 100% rename from paddle/math/MemoryHandle.h rename to paddle/legacy/math/MemoryHandle.h diff --git a/paddle/math/NEONFunctions.cpp b/paddle/legacy/math/NEONFunctions.cpp similarity index 100% rename from paddle/math/NEONFunctions.cpp rename to paddle/legacy/math/NEONFunctions.cpp diff --git a/paddle/math/NEONFunctions.h b/paddle/legacy/math/NEONFunctions.h similarity index 100% rename from paddle/math/NEONFunctions.h rename to paddle/legacy/math/NEONFunctions.h diff --git a/paddle/math/PoolAllocator.cpp b/paddle/legacy/math/PoolAllocator.cpp similarity index 100% rename from paddle/math/PoolAllocator.cpp rename to paddle/legacy/math/PoolAllocator.cpp diff --git a/paddle/math/PoolAllocator.h b/paddle/legacy/math/PoolAllocator.h similarity index 100% rename from paddle/math/PoolAllocator.h rename to paddle/legacy/math/PoolAllocator.h diff --git a/paddle/math/RowBuffer.h b/paddle/legacy/math/RowBuffer.h similarity index 99% rename from paddle/math/RowBuffer.h rename to paddle/legacy/math/RowBuffer.h index 6950afaa21..9dfd5eff06 100644 --- a/paddle/math/RowBuffer.h +++ b/paddle/legacy/math/RowBuffer.h @@ -15,7 +15,7 @@ limitations under the License. */ #pragma once #include #include "MemoryHandle.h" -#include "paddle/utils/Util.h" +#include "paddle/legacy/utils/Util.h" namespace paddle { diff --git a/paddle/math/SIMDFunctions.cpp b/paddle/legacy/math/SIMDFunctions.cpp similarity index 100% rename from paddle/math/SIMDFunctions.cpp rename to paddle/legacy/math/SIMDFunctions.cpp diff --git a/paddle/math/SIMDFunctions.h b/paddle/legacy/math/SIMDFunctions.h similarity index 100% rename from paddle/math/SIMDFunctions.h rename to paddle/legacy/math/SIMDFunctions.h diff --git a/paddle/math/SparseMatrix.cpp b/paddle/legacy/math/SparseMatrix.cpp similarity index 99% rename from paddle/math/SparseMatrix.cpp rename to paddle/legacy/math/SparseMatrix.cpp index 1faa343dbc..6f68252b0a 100644 --- a/paddle/math/SparseMatrix.cpp +++ b/paddle/legacy/math/SparseMatrix.cpp @@ -18,7 +18,7 @@ limitations under the License. */ #include #include "hl_gpu.h" #include "hl_top_k.h" -#include "paddle/utils/Util.h" +#include "paddle/legacy/utils/Util.h" namespace paddle { diff --git a/paddle/math/SparseMatrix.h b/paddle/legacy/math/SparseMatrix.h similarity index 100% rename from paddle/math/SparseMatrix.h rename to paddle/legacy/math/SparseMatrix.h diff --git a/paddle/math/SparseRowMatrix.cpp b/paddle/legacy/math/SparseRowMatrix.cpp similarity index 98% rename from paddle/math/SparseRowMatrix.cpp rename to paddle/legacy/math/SparseRowMatrix.cpp index 4254175aab..39bcdf2298 100644 --- a/paddle/math/SparseRowMatrix.cpp +++ b/paddle/legacy/math/SparseRowMatrix.cpp @@ -17,12 +17,12 @@ limitations under the License. */ #include -#include "paddle/utils/Logging.h" +#include "paddle/legacy/utils/Logging.h" #include "SIMDFunctions.h" -#include "paddle/utils/Thread.h" -#include "paddle/utils/Util.h" +#include "paddle/legacy/utils/Thread.h" +#include "paddle/legacy/utils/Util.h" namespace paddle { diff --git a/paddle/math/SparseRowMatrix.h b/paddle/legacy/math/SparseRowMatrix.h similarity index 99% rename from paddle/math/SparseRowMatrix.h rename to paddle/legacy/math/SparseRowMatrix.h index cf6779e8b0..e206747a41 100644 --- a/paddle/math/SparseRowMatrix.h +++ b/paddle/legacy/math/SparseRowMatrix.h @@ -21,7 +21,7 @@ limitations under the License. */ #include #include "Matrix.h" #include "RowBuffer.h" -#include "paddle/utils/Util.h" +#include "paddle/legacy/utils/Util.h" namespace paddle { diff --git a/paddle/math/Storage.cpp b/paddle/legacy/math/Storage.cpp similarity index 97% rename from paddle/math/Storage.cpp rename to paddle/legacy/math/Storage.cpp index 5982bf2e56..65d53aeaa9 100644 --- a/paddle/math/Storage.cpp +++ b/paddle/legacy/math/Storage.cpp @@ -14,8 +14,8 @@ limitations under the License. */ #include "Storage.h" #include "Allocator.h" -#include "paddle/utils/StringUtil.h" -#include "paddle/utils/Util.h" +#include "paddle/legacy/utils/StringUtil.h" +#include "paddle/legacy/utils/Util.h" #ifndef PADDLE_MOBILE_INFERENCE DEFINE_int32(pool_limit_size, diff --git a/paddle/math/Storage.h b/paddle/legacy/math/Storage.h similarity index 96% rename from paddle/math/Storage.h rename to paddle/legacy/math/Storage.h index 61a9aa2a07..bd22dde2c8 100644 --- a/paddle/math/Storage.h +++ b/paddle/legacy/math/Storage.h @@ -17,7 +17,7 @@ limitations under the License. */ #include #include #include "PoolAllocator.h" -#include "paddle/utils/Locks.h" +#include "paddle/legacy/utils/Locks.h" namespace paddle { diff --git a/paddle/math/TensorApply.h b/paddle/legacy/math/TensorApply.h similarity index 100% rename from paddle/math/TensorApply.h rename to paddle/legacy/math/TensorApply.h diff --git a/paddle/math/TensorAssign.h b/paddle/legacy/math/TensorAssign.h similarity index 99% rename from paddle/math/TensorAssign.h rename to paddle/legacy/math/TensorAssign.h index 7d4726ddba..efbfce6c4f 100644 --- a/paddle/math/TensorAssign.h +++ b/paddle/legacy/math/TensorAssign.h @@ -15,7 +15,7 @@ limitations under the License. */ #pragma once #include -#include "paddle/utils/Logging.h" +#include "paddle/legacy/utils/Logging.h" namespace paddle { diff --git a/paddle/math/TensorEvaluate.h b/paddle/legacy/math/TensorEvaluate.h similarity index 98% rename from paddle/math/TensorEvaluate.h rename to paddle/legacy/math/TensorEvaluate.h index 2a722016e7..3029dd35fb 100644 --- a/paddle/math/TensorEvaluate.h +++ b/paddle/legacy/math/TensorEvaluate.h @@ -16,7 +16,7 @@ limitations under the License. */ #include #include "hl_base.h" -#include "paddle/utils/Logging.h" +#include "paddle/legacy/utils/Logging.h" namespace paddle { diff --git a/paddle/math/TensorExpression.h b/paddle/legacy/math/TensorExpression.h similarity index 99% rename from paddle/math/TensorExpression.h rename to paddle/legacy/math/TensorExpression.h index f6da9adfca..1c6cf07831 100644 --- a/paddle/math/TensorExpression.h +++ b/paddle/legacy/math/TensorExpression.h @@ -16,8 +16,8 @@ limitations under the License. */ #include #include #include "hl_tensor_ops.h" -#include "paddle/utils/Common.h" -#include "paddle/utils/Logging.h" +#include "paddle/legacy/utils/Common.h" +#include "paddle/legacy/utils/Logging.h" namespace paddle { diff --git a/paddle/math/TrainingAlgorithmOp.cu b/paddle/legacy/math/TrainingAlgorithmOp.cu similarity index 99% rename from paddle/math/TrainingAlgorithmOp.cu rename to paddle/legacy/math/TrainingAlgorithmOp.cu index b844768d3b..9e1eaa0f45 100644 --- a/paddle/math/TrainingAlgorithmOp.cu +++ b/paddle/legacy/math/TrainingAlgorithmOp.cu @@ -14,7 +14,7 @@ limitations under the License. */ #include "BaseMatrix.h" #include "TrainingAlgorithmOp.h" -#include "paddle/utils/Logging.h" +#include "paddle/legacy/utils/Logging.h" #if __cplusplus > 199711L diff --git a/paddle/math/TrainingAlgorithmOp.h b/paddle/legacy/math/TrainingAlgorithmOp.h similarity index 99% rename from paddle/math/TrainingAlgorithmOp.h rename to paddle/legacy/math/TrainingAlgorithmOp.h index fe40fc2d36..921c2742cf 100644 --- a/paddle/math/TrainingAlgorithmOp.h +++ b/paddle/legacy/math/TrainingAlgorithmOp.h @@ -15,7 +15,7 @@ limitations under the License. */ #pragma once #include "BaseMatrix.h" -#include "paddle/utils/Logging.h" +#include "paddle/legacy/utils/Logging.h" namespace paddle { diff --git a/paddle/math/Vector.cpp b/paddle/legacy/math/Vector.cpp similarity index 99% rename from paddle/math/Vector.cpp rename to paddle/legacy/math/Vector.cpp index 2a47ed7ef8..87f48bb162 100644 --- a/paddle/math/Vector.cpp +++ b/paddle/legacy/math/Vector.cpp @@ -13,17 +13,17 @@ See the License for the specific language governing permissions and limitations under the License. */ #include "Vector.h" -#include "paddle/utils/Util.h" +#include "paddle/legacy/utils/Util.h" #include #include "Matrix.h" #include "hl_gpu.h" #include "hl_matrix.h" #include "hl_table_apply.h" -#include "paddle/utils/Flags.h" -#include "paddle/utils/Logging.h" -#include "paddle/utils/Thread.h" -#include "paddle/utils/ThreadLocal.h" +#include "paddle/legacy/utils/Flags.h" +#include "paddle/legacy/utils/Logging.h" +#include "paddle/legacy/utils/Thread.h" +#include "paddle/legacy/utils/ThreadLocal.h" namespace paddle { diff --git a/paddle/math/Vector.h b/paddle/legacy/math/Vector.h similarity index 99% rename from paddle/math/Vector.h rename to paddle/legacy/math/Vector.h index 964b42cae5..63cb4651c5 100644 --- a/paddle/math/Vector.h +++ b/paddle/legacy/math/Vector.h @@ -21,8 +21,8 @@ limitations under the License. */ #include "BaseMatrix.h" #include "MemoryHandle.h" -#include "paddle/utils/Common.h" -#include "paddle/utils/Thread.h" +#include "paddle/legacy/utils/Common.h" +#include "paddle/legacy/utils/Thread.h" namespace paddle { diff --git a/paddle/math/tests/CMakeLists.txt b/paddle/legacy/math/tests/CMakeLists.txt similarity index 100% rename from paddle/math/tests/CMakeLists.txt rename to paddle/legacy/math/tests/CMakeLists.txt diff --git a/paddle/math/tests/OriginalOptimizerApi.h b/paddle/legacy/math/tests/OriginalOptimizerApi.h similarity index 98% rename from paddle/math/tests/OriginalOptimizerApi.h rename to paddle/legacy/math/tests/OriginalOptimizerApi.h index e30d784b23..f386e19958 100644 --- a/paddle/math/tests/OriginalOptimizerApi.h +++ b/paddle/legacy/math/tests/OriginalOptimizerApi.h @@ -14,8 +14,8 @@ limitations under the License. */ #pragma once -#include "paddle/math/Vector.h" -#include "paddle/utils/GlobalConstants.h" +#include "paddle/legacy/math/Vector.h" +#include "paddle/legacy/utils/GlobalConstants.h" using namespace paddle; // NOLINT diff --git a/paddle/math/tests/PerfUtils.h b/paddle/legacy/math/tests/PerfUtils.h similarity index 98% rename from paddle/math/tests/PerfUtils.h rename to paddle/legacy/math/tests/PerfUtils.h index bee2351e2f..eaf4869e4c 100644 --- a/paddle/math/tests/PerfUtils.h +++ b/paddle/legacy/math/tests/PerfUtils.h @@ -21,7 +21,7 @@ limitations under the License. */ #else -#include "paddle/utils/Stat.h" +#include "paddle/legacy/utils/Stat.h" using namespace paddle; // NOLINT #define EXPRESSION_PERFORMANCE(expression) \ diff --git a/paddle/math/tests/TensorCheck.h b/paddle/legacy/math/tests/TensorCheck.h similarity index 99% rename from paddle/math/tests/TensorCheck.h rename to paddle/legacy/math/tests/TensorCheck.h index 40ac04ef5d..41c8ece282 100644 --- a/paddle/math/tests/TensorCheck.h +++ b/paddle/legacy/math/tests/TensorCheck.h @@ -20,7 +20,7 @@ limitations under the License. */ */ #include -#include "paddle/math/Matrix.h" +#include "paddle/legacy/math/Matrix.h" namespace autotest { diff --git a/paddle/math/tests/TestUtils.h b/paddle/legacy/math/tests/TestUtils.h similarity index 98% rename from paddle/math/tests/TestUtils.h rename to paddle/legacy/math/tests/TestUtils.h index e1966ec8a7..60e76359da 100644 --- a/paddle/math/tests/TestUtils.h +++ b/paddle/legacy/math/tests/TestUtils.h @@ -41,8 +41,8 @@ limitations under the License. */ #include #include "TensorCheck.h" -#include "paddle/math/Matrix.h" -#include "paddle/math/SparseMatrix.h" +#include "paddle/legacy/math/Matrix.h" +#include "paddle/legacy/math/SparseMatrix.h" namespace autotest { diff --git a/paddle/math/tests/test_Allocator.cpp b/paddle/legacy/math/tests/test_Allocator.cpp similarity index 93% rename from paddle/math/tests/test_Allocator.cpp rename to paddle/legacy/math/tests/test_Allocator.cpp index 84bc1c1d9e..122be9082a 100644 --- a/paddle/math/tests/test_Allocator.cpp +++ b/paddle/legacy/math/tests/test_Allocator.cpp @@ -13,12 +13,12 @@ See the License for the specific language governing permissions and limitations under the License. */ #include -#include "paddle/utils/Logging.h" -#include "paddle/utils/Util.h" +#include "paddle/legacy/utils/Logging.h" +#include "paddle/legacy/utils/Util.h" #define private public -#include "paddle/math/Allocator.h" -#include "paddle/math/MemoryHandle.h" -#include "paddle/math/PoolAllocator.h" +#include "paddle/legacy/math/Allocator.h" +#include "paddle/legacy/math/MemoryHandle.h" +#include "paddle/legacy/math/PoolAllocator.h" using namespace paddle; // NOLINT diff --git a/paddle/math/tests/test_BaseMatrix.cpp b/paddle/legacy/math/tests/test_BaseMatrix.cpp similarity index 99% rename from paddle/math/tests/test_BaseMatrix.cpp rename to paddle/legacy/math/tests/test_BaseMatrix.cpp index 6f7beb60c8..488765c6ac 100644 --- a/paddle/math/tests/test_BaseMatrix.cpp +++ b/paddle/legacy/math/tests/test_BaseMatrix.cpp @@ -21,7 +21,7 @@ limitations under the License. */ #include #include "TestUtils.h" -#include "paddle/math/BaseMatrix.h" +#include "paddle/legacy/math/BaseMatrix.h" using paddle::BaseMatrix; using paddle::Matrix; diff --git a/paddle/math/tests/test_CpuGpuVector.cpp b/paddle/legacy/math/tests/test_CpuGpuVector.cpp similarity index 97% rename from paddle/math/tests/test_CpuGpuVector.cpp rename to paddle/legacy/math/tests/test_CpuGpuVector.cpp index 395541a76a..010fef534d 100644 --- a/paddle/math/tests/test_CpuGpuVector.cpp +++ b/paddle/legacy/math/tests/test_CpuGpuVector.cpp @@ -15,8 +15,8 @@ limitations under the License. */ #ifdef PADDLE_WITH_CUDA #include -#include "paddle/math/Vector.h" -#include "paddle/utils/Util.h" +#include "paddle/legacy/math/Vector.h" +#include "paddle/legacy/utils/Util.h" #include "test_matrixUtil.h" using namespace paddle; // NOLINT diff --git a/paddle/math/tests/test_ExecViaCpu.cpp b/paddle/legacy/math/tests/test_ExecViaCpu.cpp similarity index 96% rename from paddle/math/tests/test_ExecViaCpu.cpp rename to paddle/legacy/math/tests/test_ExecViaCpu.cpp index 72256cb9d4..b2ce0bc7ed 100644 --- a/paddle/math/tests/test_ExecViaCpu.cpp +++ b/paddle/legacy/math/tests/test_ExecViaCpu.cpp @@ -13,10 +13,10 @@ See the License for the specific language governing permissions and limitations under the License. */ #include -#include -#include +#include +#include #include -#include "paddle/math/SparseMatrix.h" +#include "paddle/legacy/math/SparseMatrix.h" using namespace paddle; // NOLINT diff --git a/paddle/math/tests/test_FPException.cpp b/paddle/legacy/math/tests/test_FPException.cpp similarity index 97% rename from paddle/math/tests/test_FPException.cpp rename to paddle/legacy/math/tests/test_FPException.cpp index d87fdcda9e..aa6aea71c8 100644 --- a/paddle/math/tests/test_FPException.cpp +++ b/paddle/legacy/math/tests/test_FPException.cpp @@ -30,8 +30,8 @@ limitations under the License. */ */ #include -#include "paddle/math/Matrix.h" -#include "paddle/utils/Common.h" +#include "paddle/legacy/math/Matrix.h" +#include "paddle/legacy/utils/Common.h" using namespace paddle; // NOLINT diff --git a/paddle/math/tests/test_GpuProfiler.cpp b/paddle/legacy/math/tests/test_GpuProfiler.cpp similarity index 97% rename from paddle/math/tests/test_GpuProfiler.cpp rename to paddle/legacy/math/tests/test_GpuProfiler.cpp index 828159660b..ee27109f21 100644 --- a/paddle/math/tests/test_GpuProfiler.cpp +++ b/paddle/legacy/math/tests/test_GpuProfiler.cpp @@ -15,11 +15,11 @@ limitations under the License. */ #ifdef PADDLE_WITH_CUDA #include -#include "paddle/math/Matrix.h" -#include "paddle/math/SparseMatrix.h" +#include "paddle/legacy/math/Matrix.h" +#include "paddle/legacy/math/SparseMatrix.h" +#include "paddle/legacy/utils/Stat.h" +#include "paddle/legacy/utils/Util.h" #include "paddle/testing/TestUtil.h" -#include "paddle/utils/Stat.h" -#include "paddle/utils/Util.h" using namespace paddle; // NOLINT using namespace std; // NOLINT diff --git a/paddle/math/tests/test_Matrix.cpp b/paddle/legacy/math/tests/test_Matrix.cpp similarity index 100% rename from paddle/math/tests/test_Matrix.cpp rename to paddle/legacy/math/tests/test_Matrix.cpp diff --git a/paddle/math/tests/test_RowBuffer.cpp b/paddle/legacy/math/tests/test_RowBuffer.cpp similarity index 98% rename from paddle/math/tests/test_RowBuffer.cpp rename to paddle/legacy/math/tests/test_RowBuffer.cpp index e38de853e0..2ef8cd303d 100644 --- a/paddle/math/tests/test_RowBuffer.cpp +++ b/paddle/legacy/math/tests/test_RowBuffer.cpp @@ -13,7 +13,7 @@ See the License for the specific language governing permissions and limitations under the License. */ #include -#include "paddle/math/RowBuffer.h" +#include "paddle/legacy/math/RowBuffer.h" TEST(RowBuffer, testAutoGrow) { paddle::RowBuffer buf(128); diff --git a/paddle/math/tests/test_SIMDFunctions.cpp b/paddle/legacy/math/tests/test_SIMDFunctions.cpp similarity index 98% rename from paddle/math/tests/test_SIMDFunctions.cpp rename to paddle/legacy/math/tests/test_SIMDFunctions.cpp index b692679436..c6490f70e3 100644 --- a/paddle/math/tests/test_SIMDFunctions.cpp +++ b/paddle/legacy/math/tests/test_SIMDFunctions.cpp @@ -12,8 +12,8 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. */ -#include "paddle/math/SIMDFunctions.h" -#include "paddle/utils/Util.h" +#include "paddle/legacy/math/SIMDFunctions.h" +#include "paddle/legacy/utils/Util.h" #include diff --git a/paddle/math/tests/test_SparseMatrix.cpp b/paddle/legacy/math/tests/test_SparseMatrix.cpp similarity index 99% rename from paddle/math/tests/test_SparseMatrix.cpp rename to paddle/legacy/math/tests/test_SparseMatrix.cpp index dbcbeb8d50..30896a945e 100644 --- a/paddle/math/tests/test_SparseMatrix.cpp +++ b/paddle/legacy/math/tests/test_SparseMatrix.cpp @@ -12,7 +12,7 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. */ -#include +#include #include #include "test_matrixUtil.h" diff --git a/paddle/math/tests/test_Tensor.cu b/paddle/legacy/math/tests/test_Tensor.cu similarity index 99% rename from paddle/math/tests/test_Tensor.cu rename to paddle/legacy/math/tests/test_Tensor.cu index acb2da86d0..3ce056d661 100644 --- a/paddle/math/tests/test_Tensor.cu +++ b/paddle/legacy/math/tests/test_Tensor.cu @@ -14,7 +14,7 @@ limitations under the License. */ #include #include "TensorCheck.h" -#include "paddle/math/Matrix.h" +#include "paddle/legacy/math/Matrix.h" using paddle::Matrix; using paddle::CpuMatrix; diff --git a/paddle/math/tests/test_TrainingAlgorithm.cpp b/paddle/legacy/math/tests/test_TrainingAlgorithm.cpp similarity index 99% rename from paddle/math/tests/test_TrainingAlgorithm.cpp rename to paddle/legacy/math/tests/test_TrainingAlgorithm.cpp index fb58d26734..214ae8971a 100644 --- a/paddle/math/tests/test_TrainingAlgorithm.cpp +++ b/paddle/legacy/math/tests/test_TrainingAlgorithm.cpp @@ -16,8 +16,8 @@ limitations under the License. */ #include "OriginalOptimizerApi.h" #include "PerfUtils.h" #include "TensorCheck.h" -#include "paddle/math/TrainingAlgorithmOp.h" -#include "paddle/utils/Util.h" +#include "paddle/legacy/math/TrainingAlgorithmOp.h" +#include "paddle/legacy/utils/Util.h" using namespace paddle; // NOLINT diff --git a/paddle/math/tests/test_batchTranspose.cpp b/paddle/legacy/math/tests/test_batchTranspose.cpp similarity index 100% rename from paddle/math/tests/test_batchTranspose.cpp rename to paddle/legacy/math/tests/test_batchTranspose.cpp diff --git a/paddle/math/tests/test_lazyAssign.cu b/paddle/legacy/math/tests/test_lazyAssign.cu similarity index 97% rename from paddle/math/tests/test_lazyAssign.cu rename to paddle/legacy/math/tests/test_lazyAssign.cu index cbd74bbfe3..cf8c3d7719 100644 --- a/paddle/math/tests/test_lazyAssign.cu +++ b/paddle/legacy/math/tests/test_lazyAssign.cu @@ -15,8 +15,8 @@ limitations under the License. */ #include #include "PerfUtils.h" #include "TensorCheck.h" -#include "paddle/math/Matrix.h" -#include "paddle/math/TensorAssign.h" +#include "paddle/legacy/math/Matrix.h" +#include "paddle/legacy/math/TensorAssign.h" using paddle::BaseMatrix; using paddle::CpuMatrix; diff --git a/paddle/math/tests/test_matrixCompare.cpp b/paddle/legacy/math/tests/test_matrixCompare.cpp similarity index 99% rename from paddle/math/tests/test_matrixCompare.cpp rename to paddle/legacy/math/tests/test_matrixCompare.cpp index e45ddd433f..a43adde46f 100644 --- a/paddle/math/tests/test_matrixCompare.cpp +++ b/paddle/legacy/math/tests/test_matrixCompare.cpp @@ -18,13 +18,13 @@ limitations under the License. */ #include #include "TensorCheck.h" -#include "paddle/math/MathUtils.h" -#include "paddle/math/Matrix.h" -#include "paddle/math/SparseMatrix.h" +#include "paddle/legacy/math/MathUtils.h" +#include "paddle/legacy/math/Matrix.h" +#include "paddle/legacy/math/SparseMatrix.h" +#include "paddle/legacy/utils/DynamicLoader.h" +#include "paddle/legacy/utils/Stat.h" +#include "paddle/legacy/utils/Util.h" #include "paddle/testing/TestUtil.h" -#include "paddle/utils/DynamicLoader.h" -#include "paddle/utils/Stat.h" -#include "paddle/utils/Util.h" using namespace paddle; // NOLINT using namespace std; // NOLINT diff --git a/paddle/math/tests/test_matrixUtil.h b/paddle/legacy/math/tests/test_matrixUtil.h similarity index 98% rename from paddle/math/tests/test_matrixUtil.h rename to paddle/legacy/math/tests/test_matrixUtil.h index 86297547dc..58c93f746e 100644 --- a/paddle/math/tests/test_matrixUtil.h +++ b/paddle/legacy/math/tests/test_matrixUtil.h @@ -14,8 +14,8 @@ limitations under the License. */ #pragma once #include -#include -#include "paddle/math/SparseMatrix.h" +#include +#include "paddle/legacy/math/SparseMatrix.h" namespace paddle { diff --git a/paddle/math/tests/test_perturbation.cpp b/paddle/legacy/math/tests/test_perturbation.cpp similarity index 100% rename from paddle/math/tests/test_perturbation.cpp rename to paddle/legacy/math/tests/test_perturbation.cpp diff --git a/paddle/math/tests/test_sparseMatrixCompare.cpp b/paddle/legacy/math/tests/test_sparseMatrixCompare.cpp similarity index 98% rename from paddle/math/tests/test_sparseMatrixCompare.cpp rename to paddle/legacy/math/tests/test_sparseMatrixCompare.cpp index 12647d21a2..492aa0a689 100644 --- a/paddle/math/tests/test_sparseMatrixCompare.cpp +++ b/paddle/legacy/math/tests/test_sparseMatrixCompare.cpp @@ -18,8 +18,8 @@ limitations under the License. */ /// only cpu version. #include -#include "paddle/math/Matrix.h" -#include "paddle/utils/Util.h" +#include "paddle/legacy/math/Matrix.h" +#include "paddle/legacy/utils/Util.h" #include "test_matrixUtil.h" using namespace paddle; // NOLINT diff --git a/paddle/optimizer/CMakeLists.txt b/paddle/legacy/optimizer/CMakeLists.txt similarity index 100% rename from paddle/optimizer/CMakeLists.txt rename to paddle/legacy/optimizer/CMakeLists.txt diff --git a/paddle/optimizer/adadelta_optimizer.cc b/paddle/legacy/optimizer/adadelta_optimizer.cc similarity index 100% rename from paddle/optimizer/adadelta_optimizer.cc rename to paddle/legacy/optimizer/adadelta_optimizer.cc diff --git a/paddle/optimizer/adadelta_optimizer.h b/paddle/legacy/optimizer/adadelta_optimizer.h similarity index 100% rename from paddle/optimizer/adadelta_optimizer.h rename to paddle/legacy/optimizer/adadelta_optimizer.h diff --git a/paddle/optimizer/adagrad_optimizer.cc b/paddle/legacy/optimizer/adagrad_optimizer.cc similarity index 100% rename from paddle/optimizer/adagrad_optimizer.cc rename to paddle/legacy/optimizer/adagrad_optimizer.cc diff --git a/paddle/optimizer/adagrad_optimizer.h b/paddle/legacy/optimizer/adagrad_optimizer.h similarity index 100% rename from paddle/optimizer/adagrad_optimizer.h rename to paddle/legacy/optimizer/adagrad_optimizer.h diff --git a/paddle/optimizer/adam_optimizer.cc b/paddle/legacy/optimizer/adam_optimizer.cc similarity index 100% rename from paddle/optimizer/adam_optimizer.cc rename to paddle/legacy/optimizer/adam_optimizer.cc diff --git a/paddle/optimizer/adam_optimizer.h b/paddle/legacy/optimizer/adam_optimizer.h similarity index 100% rename from paddle/optimizer/adam_optimizer.h rename to paddle/legacy/optimizer/adam_optimizer.h diff --git a/paddle/optimizer/lr_policy.h b/paddle/legacy/optimizer/lr_policy.h similarity index 100% rename from paddle/optimizer/lr_policy.h rename to paddle/legacy/optimizer/lr_policy.h diff --git a/paddle/optimizer/optimizer.cc b/paddle/legacy/optimizer/optimizer.cc similarity index 100% rename from paddle/optimizer/optimizer.cc rename to paddle/legacy/optimizer/optimizer.cc diff --git a/paddle/optimizer/optimizer.h b/paddle/legacy/optimizer/optimizer.h similarity index 100% rename from paddle/optimizer/optimizer.h rename to paddle/legacy/optimizer/optimizer.h diff --git a/paddle/optimizer/parameter_optimizer.cc b/paddle/legacy/optimizer/parameter_optimizer.cc similarity index 100% rename from paddle/optimizer/parameter_optimizer.cc rename to paddle/legacy/optimizer/parameter_optimizer.cc diff --git a/paddle/optimizer/parameter_optimizer.h b/paddle/legacy/optimizer/parameter_optimizer.h similarity index 100% rename from paddle/optimizer/parameter_optimizer.h rename to paddle/legacy/optimizer/parameter_optimizer.h diff --git a/paddle/optimizer/parameter_optimizer_test.cc b/paddle/legacy/optimizer/parameter_optimizer_test.cc similarity index 100% rename from paddle/optimizer/parameter_optimizer_test.cc rename to paddle/legacy/optimizer/parameter_optimizer_test.cc diff --git a/paddle/optimizer/serialization.h b/paddle/legacy/optimizer/serialization.h similarity index 97% rename from paddle/optimizer/serialization.h rename to paddle/legacy/optimizer/serialization.h index bf12eed15f..2067a8d8cf 100644 --- a/paddle/optimizer/serialization.h +++ b/paddle/legacy/optimizer/serialization.h @@ -19,7 +19,7 @@ #include #include #include "OptimizerConfig.pb.h" -#include "paddle/utils/Logging.h" +#include "paddle/legacy/utils/Logging.h" #include "tensor.h" namespace paddle { diff --git a/paddle/optimizer/serialization_test.cc b/paddle/legacy/optimizer/serialization_test.cc similarity index 100% rename from paddle/optimizer/serialization_test.cc rename to paddle/legacy/optimizer/serialization_test.cc diff --git a/paddle/optimizer/sgd_optimizer.cc b/paddle/legacy/optimizer/sgd_optimizer.cc similarity index 100% rename from paddle/optimizer/sgd_optimizer.cc rename to paddle/legacy/optimizer/sgd_optimizer.cc diff --git a/paddle/optimizer/sgd_optimizer.h b/paddle/legacy/optimizer/sgd_optimizer.h similarity index 100% rename from paddle/optimizer/sgd_optimizer.h rename to paddle/legacy/optimizer/sgd_optimizer.h diff --git a/paddle/optimizer/tensor.h b/paddle/legacy/optimizer/tensor.h similarity index 95% rename from paddle/optimizer/tensor.h rename to paddle/legacy/optimizer/tensor.h index d2cef99074..2e58577d4d 100644 --- a/paddle/optimizer/tensor.h +++ b/paddle/legacy/optimizer/tensor.h @@ -18,8 +18,8 @@ #include #include -#include "paddle/utils/Common.h" -#include "paddle/utils/Logging.h" +#include "paddle/legacy/utils/Common.h" +#include "paddle/legacy/utils/Logging.h" namespace paddle { namespace optimizer { diff --git a/paddle/parameter/Argument.cpp b/paddle/legacy/parameter/Argument.cpp similarity index 99% rename from paddle/parameter/Argument.cpp rename to paddle/legacy/parameter/Argument.cpp index 94522f718a..3f1d599e90 100644 --- a/paddle/parameter/Argument.cpp +++ b/paddle/legacy/parameter/Argument.cpp @@ -13,7 +13,7 @@ See the License for the specific language governing permissions and limitations under the License. */ #include "Argument.h" -#include "paddle/math/SparseMatrix.h" +#include "paddle/legacy/math/SparseMatrix.h" #include diff --git a/paddle/parameter/Argument.h b/paddle/legacy/parameter/Argument.h similarity index 98% rename from paddle/parameter/Argument.h rename to paddle/legacy/parameter/Argument.h index e580d38216..ea8634896c 100644 --- a/paddle/parameter/Argument.h +++ b/paddle/legacy/parameter/Argument.h @@ -13,11 +13,11 @@ limitations under the License. */ #include "hl_gpu.h" -#include "paddle/math/Matrix.h" -#include "paddle/math/Vector.h" -#include "paddle/parameter/Parameter.h" -#include "paddle/utils/Locks.h" -#include "paddle/utils/Util.h" +#include "paddle/legacy/math/Matrix.h" +#include "paddle/legacy/math/Vector.h" +#include "paddle/legacy/parameter/Parameter.h" +#include "paddle/legacy/utils/Locks.h" +#include "paddle/legacy/utils/Util.h" namespace paddle { diff --git a/paddle/parameter/AverageOptimizer.cpp b/paddle/legacy/parameter/AverageOptimizer.cpp similarity index 100% rename from paddle/parameter/AverageOptimizer.cpp rename to paddle/legacy/parameter/AverageOptimizer.cpp diff --git a/paddle/parameter/AverageOptimizer.h b/paddle/legacy/parameter/AverageOptimizer.h similarity index 100% rename from paddle/parameter/AverageOptimizer.h rename to paddle/legacy/parameter/AverageOptimizer.h diff --git a/paddle/parameter/CMakeLists.txt b/paddle/legacy/parameter/CMakeLists.txt similarity index 100% rename from paddle/parameter/CMakeLists.txt rename to paddle/legacy/parameter/CMakeLists.txt diff --git a/paddle/parameter/FirstOrderOptimizer.cpp b/paddle/legacy/parameter/FirstOrderOptimizer.cpp similarity index 98% rename from paddle/parameter/FirstOrderOptimizer.cpp rename to paddle/legacy/parameter/FirstOrderOptimizer.cpp index 182e833405..4f82a115f7 100644 --- a/paddle/parameter/FirstOrderOptimizer.cpp +++ b/paddle/legacy/parameter/FirstOrderOptimizer.cpp @@ -13,9 +13,9 @@ See the License for the specific language governing permissions and limitations under the License. */ #include "FirstOrderOptimizer.h" -#include "paddle/math/TrainingAlgorithmOp.h" -#include "paddle/utils/Flags.h" -#include "paddle/utils/Util.h" +#include "paddle/legacy/math/TrainingAlgorithmOp.h" +#include "paddle/legacy/utils/Flags.h" +#include "paddle/legacy/utils/Util.h" #include diff --git a/paddle/parameter/FirstOrderOptimizer.h b/paddle/legacy/parameter/FirstOrderOptimizer.h similarity index 100% rename from paddle/parameter/FirstOrderOptimizer.h rename to paddle/legacy/parameter/FirstOrderOptimizer.h diff --git a/paddle/parameter/LearningRateScheduler.cpp b/paddle/legacy/parameter/LearningRateScheduler.cpp similarity index 99% rename from paddle/parameter/LearningRateScheduler.cpp rename to paddle/legacy/parameter/LearningRateScheduler.cpp index d57d2189a4..68c44a7ec4 100644 --- a/paddle/parameter/LearningRateScheduler.cpp +++ b/paddle/legacy/parameter/LearningRateScheduler.cpp @@ -13,7 +13,7 @@ See the License for the specific language governing permissions and limitations under the License. */ #include "LearningRateScheduler.h" -#include "paddle/utils/StringUtil.h" +#include "paddle/legacy/utils/StringUtil.h" namespace paddle { diff --git a/paddle/parameter/LearningRateScheduler.h b/paddle/legacy/parameter/LearningRateScheduler.h similarity index 96% rename from paddle/parameter/LearningRateScheduler.h rename to paddle/legacy/parameter/LearningRateScheduler.h index 3fad970402..fc7e380a6a 100644 --- a/paddle/parameter/LearningRateScheduler.h +++ b/paddle/legacy/parameter/LearningRateScheduler.h @@ -15,7 +15,7 @@ limitations under the License. */ #pragma once #include "TrainerConfig.pb.h" -#include "paddle/utils/ClassRegistrar.h" +#include "paddle/legacy/utils/ClassRegistrar.h" namespace paddle { // NOLINTNEXTLINES_4 diff --git a/paddle/parameter/OptimizerFunctions.cpp b/paddle/legacy/parameter/OptimizerFunctions.cpp similarity index 100% rename from paddle/parameter/OptimizerFunctions.cpp rename to paddle/legacy/parameter/OptimizerFunctions.cpp diff --git a/paddle/parameter/OptimizerFunctions.h b/paddle/legacy/parameter/OptimizerFunctions.h similarity index 100% rename from paddle/parameter/OptimizerFunctions.h rename to paddle/legacy/parameter/OptimizerFunctions.h diff --git a/paddle/parameter/OptimizerWithRegularizer.cpp b/paddle/legacy/parameter/OptimizerWithRegularizer.cpp similarity index 100% rename from paddle/parameter/OptimizerWithRegularizer.cpp rename to paddle/legacy/parameter/OptimizerWithRegularizer.cpp diff --git a/paddle/parameter/OptimizerWithRegularizer.h b/paddle/legacy/parameter/OptimizerWithRegularizer.h similarity index 100% rename from paddle/parameter/OptimizerWithRegularizer.h rename to paddle/legacy/parameter/OptimizerWithRegularizer.h diff --git a/paddle/parameter/Parameter.cpp b/paddle/legacy/parameter/Parameter.cpp similarity index 98% rename from paddle/parameter/Parameter.cpp rename to paddle/legacy/parameter/Parameter.cpp index 0e6ea90f3d..666d808f0c 100644 --- a/paddle/parameter/Parameter.cpp +++ b/paddle/legacy/parameter/Parameter.cpp @@ -22,10 +22,10 @@ limitations under the License. */ #include "ParameterUpdateFunctions.h" #include "ThreadLocalBuffer.h" #include "hl_gpu.h" -#include "paddle/math/CpuSparseMatrix.h" -#include "paddle/math/MathUtils.h" -#include "paddle/math/SparseRowMatrix.h" -#include "paddle/utils/Logging.h" +#include "paddle/legacy/math/CpuSparseMatrix.h" +#include "paddle/legacy/math/MathUtils.h" +#include "paddle/legacy/math/SparseRowMatrix.h" +#include "paddle/legacy/utils/Logging.h" DEFINE_int32(enable_grad_share, (100 * 1024 * 1024), diff --git a/paddle/parameter/Parameter.h b/paddle/legacy/parameter/Parameter.h similarity index 97% rename from paddle/parameter/Parameter.h rename to paddle/legacy/parameter/Parameter.h index ef519bf35a..43b567dad0 100644 --- a/paddle/parameter/Parameter.h +++ b/paddle/legacy/parameter/Parameter.h @@ -24,13 +24,13 @@ limitations under the License. */ #include "TrainerConfig.pb.h" #include "ParameterUpdaterHook.h" -#include "paddle/math/Matrix.h" -#include "paddle/math/Vector.h" -#include "paddle/utils/Common.h" -#include "paddle/utils/GlobalConstants.h" -#include "paddle/utils/Locks.h" -#include "paddle/utils/ThreadLocal.h" -#include "paddle/utils/Util.h" +#include "paddle/legacy/math/Matrix.h" +#include "paddle/legacy/math/Vector.h" +#include "paddle/legacy/utils/Common.h" +#include "paddle/legacy/utils/GlobalConstants.h" +#include "paddle/legacy/utils/Locks.h" +#include "paddle/legacy/utils/ThreadLocal.h" +#include "paddle/legacy/utils/Util.h" namespace paddle { diff --git a/paddle/parameter/ParameterOptimizer.cpp b/paddle/legacy/parameter/ParameterOptimizer.cpp similarity index 98% rename from paddle/parameter/ParameterOptimizer.cpp rename to paddle/legacy/parameter/ParameterOptimizer.cpp index 638daa58f1..b9dffa5afb 100644 --- a/paddle/parameter/ParameterOptimizer.cpp +++ b/paddle/legacy/parameter/ParameterOptimizer.cpp @@ -12,7 +12,7 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. */ -#include "paddle/utils/Logging.h" +#include "paddle/legacy/utils/Logging.h" #include diff --git a/paddle/parameter/ParameterOptimizer.h b/paddle/legacy/parameter/ParameterOptimizer.h similarity index 100% rename from paddle/parameter/ParameterOptimizer.h rename to paddle/legacy/parameter/ParameterOptimizer.h diff --git a/paddle/parameter/ParameterUpdateFunctions.cpp b/paddle/legacy/parameter/ParameterUpdateFunctions.cpp similarity index 99% rename from paddle/parameter/ParameterUpdateFunctions.cpp rename to paddle/legacy/parameter/ParameterUpdateFunctions.cpp index db1153c2d6..72c9841acf 100644 --- a/paddle/parameter/ParameterUpdateFunctions.cpp +++ b/paddle/legacy/parameter/ParameterUpdateFunctions.cpp @@ -12,7 +12,7 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. */ -#include "paddle/utils/Logging.h" +#include "paddle/legacy/utils/Logging.h" #ifdef __AVX__ #include #include diff --git a/paddle/parameter/ParameterUpdateFunctions.h b/paddle/legacy/parameter/ParameterUpdateFunctions.h similarity index 95% rename from paddle/parameter/ParameterUpdateFunctions.h rename to paddle/legacy/parameter/ParameterUpdateFunctions.h index 7434baa2d3..a7cc1c4c47 100644 --- a/paddle/parameter/ParameterUpdateFunctions.h +++ b/paddle/legacy/parameter/ParameterUpdateFunctions.h @@ -14,8 +14,8 @@ limitations under the License. */ #pragma once -#include "paddle/math/Vector.h" -#include "paddle/utils/Common.h" +#include "paddle/legacy/math/Vector.h" +#include "paddle/legacy/utils/Common.h" namespace paddle { diff --git a/paddle/parameter/ParameterUpdaterBase.cpp b/paddle/legacy/parameter/ParameterUpdaterBase.cpp similarity index 96% rename from paddle/parameter/ParameterUpdaterBase.cpp rename to paddle/legacy/parameter/ParameterUpdaterBase.cpp index 7815856b45..7d9d3fad63 100644 --- a/paddle/parameter/ParameterUpdaterBase.cpp +++ b/paddle/legacy/parameter/ParameterUpdaterBase.cpp @@ -15,7 +15,7 @@ limitations under the License. */ #include "ParameterUpdaterBase.h" #include #include "hl_gpu.h" -#include "paddle/utils/Logging.h" +#include "paddle/legacy/utils/Logging.h" namespace paddle { diff --git a/paddle/parameter/ParameterUpdaterBase.h b/paddle/legacy/parameter/ParameterUpdaterBase.h similarity index 100% rename from paddle/parameter/ParameterUpdaterBase.h rename to paddle/legacy/parameter/ParameterUpdaterBase.h diff --git a/paddle/parameter/ParameterUpdaterHook.cpp b/paddle/legacy/parameter/ParameterUpdaterHook.cpp similarity index 96% rename from paddle/parameter/ParameterUpdaterHook.cpp rename to paddle/legacy/parameter/ParameterUpdaterHook.cpp index 989185b66a..bfb9769fb6 100644 --- a/paddle/parameter/ParameterUpdaterHook.cpp +++ b/paddle/legacy/parameter/ParameterUpdaterHook.cpp @@ -22,10 +22,10 @@ limitations under the License. */ #include #include -#include "paddle/math/Vector.h" -#include "paddle/parameter/Parameter.h" -#include "paddle/utils/Flags.h" -#include "paddle/utils/Util.h" +#include "paddle/legacy/math/Vector.h" +#include "paddle/legacy/parameter/Parameter.h" +#include "paddle/legacy/utils/Flags.h" +#include "paddle/legacy/utils/Util.h" namespace paddle { diff --git a/paddle/parameter/ParameterUpdaterHook.h b/paddle/legacy/parameter/ParameterUpdaterHook.h similarity index 100% rename from paddle/parameter/ParameterUpdaterHook.h rename to paddle/legacy/parameter/ParameterUpdaterHook.h diff --git a/paddle/parameter/Regularizer.cpp b/paddle/legacy/parameter/Regularizer.cpp similarity index 95% rename from paddle/parameter/Regularizer.cpp rename to paddle/legacy/parameter/Regularizer.cpp index d223fd2df6..c1d5f4fa68 100644 --- a/paddle/parameter/Regularizer.cpp +++ b/paddle/legacy/parameter/Regularizer.cpp @@ -13,8 +13,8 @@ See the License for the specific language governing permissions and limitations under the License. */ #include "Regularizer.h" -#include "paddle/utils/Flags.h" -#include "paddle/utils/Util.h" +#include "paddle/legacy/utils/Flags.h" +#include "paddle/legacy/utils/Util.h" namespace paddle { diff --git a/paddle/parameter/Regularizer.h b/paddle/legacy/parameter/Regularizer.h similarity index 100% rename from paddle/parameter/Regularizer.h rename to paddle/legacy/parameter/Regularizer.h diff --git a/paddle/parameter/ThreadLocalBuffer.cpp b/paddle/legacy/parameter/ThreadLocalBuffer.cpp similarity index 100% rename from paddle/parameter/ThreadLocalBuffer.cpp rename to paddle/legacy/parameter/ThreadLocalBuffer.cpp diff --git a/paddle/parameter/ThreadLocalBuffer.h b/paddle/legacy/parameter/ThreadLocalBuffer.h similarity index 94% rename from paddle/parameter/ThreadLocalBuffer.h rename to paddle/legacy/parameter/ThreadLocalBuffer.h index 07c96e59d0..d360feeed6 100644 --- a/paddle/parameter/ThreadLocalBuffer.h +++ b/paddle/legacy/parameter/ThreadLocalBuffer.h @@ -13,7 +13,7 @@ See the License for the specific language governing permissions and limitations under the License. */ #pragma once -#include "paddle/math/Vector.h" +#include "paddle/legacy/math/Vector.h" namespace paddle { namespace parameter { diff --git a/paddle/parameter/Weight.cpp b/paddle/legacy/parameter/Weight.cpp similarity index 98% rename from paddle/parameter/Weight.cpp rename to paddle/legacy/parameter/Weight.cpp index ba4ddce69f..9d94050a5c 100644 --- a/paddle/parameter/Weight.cpp +++ b/paddle/legacy/parameter/Weight.cpp @@ -13,7 +13,7 @@ See the License for the specific language governing permissions and limitations under the License. */ #include "Weight.h" -#include "paddle/utils/Logging.h" +#include "paddle/legacy/utils/Logging.h" namespace paddle { diff --git a/paddle/parameter/Weight.h b/paddle/legacy/parameter/Weight.h similarity index 90% rename from paddle/parameter/Weight.h rename to paddle/legacy/parameter/Weight.h index 113dd6530c..241c8d829c 100644 --- a/paddle/parameter/Weight.h +++ b/paddle/legacy/parameter/Weight.h @@ -16,9 +16,9 @@ limitations under the License. */ #include #include -#include "paddle/math/Matrix.h" -#include "paddle/math/SparseRowMatrix.h" -#include "paddle/parameter/Parameter.h" +#include "paddle/legacy/math/Matrix.h" +#include "paddle/legacy/math/SparseRowMatrix.h" +#include "paddle/legacy/parameter/Parameter.h" namespace paddle { diff --git a/paddle/parameter/tests/CMakeLists.txt b/paddle/legacy/parameter/tests/CMakeLists.txt similarity index 100% rename from paddle/parameter/tests/CMakeLists.txt rename to paddle/legacy/parameter/tests/CMakeLists.txt diff --git a/paddle/parameter/tests/test_argument.cpp b/paddle/legacy/parameter/tests/test_argument.cpp similarity index 97% rename from paddle/parameter/tests/test_argument.cpp rename to paddle/legacy/parameter/tests/test_argument.cpp index 54ceb3e087..0c632e0cd1 100644 --- a/paddle/parameter/tests/test_argument.cpp +++ b/paddle/legacy/parameter/tests/test_argument.cpp @@ -13,7 +13,7 @@ See the License for the specific language governing permissions and limitations under the License. */ #include -#include +#include using namespace paddle; // NOLINT diff --git a/paddle/parameter/tests/test_common.cpp b/paddle/legacy/parameter/tests/test_common.cpp similarity index 96% rename from paddle/parameter/tests/test_common.cpp rename to paddle/legacy/parameter/tests/test_common.cpp index 89dcc6c751..8de9d6da98 100644 --- a/paddle/parameter/tests/test_common.cpp +++ b/paddle/legacy/parameter/tests/test_common.cpp @@ -12,14 +12,14 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. */ -#include +#include #include #include -#include -#include -#include -#include +#include +#include +#include +#include using namespace paddle; // NOLINT diff --git a/paddle/pserver/BaseClient.cpp b/paddle/legacy/pserver/BaseClient.cpp similarity index 98% rename from paddle/pserver/BaseClient.cpp rename to paddle/legacy/pserver/BaseClient.cpp index a6204ef47e..13bb8a1cc5 100644 --- a/paddle/pserver/BaseClient.cpp +++ b/paddle/legacy/pserver/BaseClient.cpp @@ -16,7 +16,7 @@ limitations under the License. */ #include #include #include -#include "paddle/utils/Stat.h" +#include "paddle/legacy/utils/Stat.h" DECLARE_string(pservers); diff --git a/paddle/pserver/BaseClient.h b/paddle/legacy/pserver/BaseClient.h similarity index 98% rename from paddle/pserver/BaseClient.h rename to paddle/legacy/pserver/BaseClient.h index d50230e73a..66e8f39cd6 100644 --- a/paddle/pserver/BaseClient.h +++ b/paddle/legacy/pserver/BaseClient.h @@ -15,10 +15,10 @@ limitations under the License. */ #pragma once #include "ParameterService.pb.h" -#include "paddle/math/Matrix.h" -#include "paddle/pserver/ProtoServer.h" -#include "paddle/utils/Common.h" -#include "paddle/utils/Queue.h" +#include "paddle/legacy/math/Matrix.h" +#include "paddle/legacy/pserver/ProtoServer.h" +#include "paddle/legacy/utils/Common.h" +#include "paddle/legacy/utils/Queue.h" namespace paddle { diff --git a/paddle/pserver/CMakeLists.txt b/paddle/legacy/pserver/CMakeLists.txt similarity index 100% rename from paddle/pserver/CMakeLists.txt rename to paddle/legacy/pserver/CMakeLists.txt diff --git a/paddle/pserver/LightNetwork.cpp b/paddle/legacy/pserver/LightNetwork.cpp similarity index 99% rename from paddle/pserver/LightNetwork.cpp rename to paddle/legacy/pserver/LightNetwork.cpp index 4c0da2217e..469c95853e 100644 --- a/paddle/pserver/LightNetwork.cpp +++ b/paddle/legacy/pserver/LightNetwork.cpp @@ -27,8 +27,8 @@ limitations under the License. */ #include "LightNetwork.h" #include "RDMANetwork.h" -#include "paddle/utils/StringUtil.h" -#include "paddle/utils/Util.h" +#include "paddle/legacy/utils/StringUtil.h" +#include "paddle/legacy/utils/Util.h" /// quick ack can reduce the latency of small message DEFINE_bool(small_messages, diff --git a/paddle/pserver/LightNetwork.h b/paddle/legacy/pserver/LightNetwork.h similarity index 99% rename from paddle/pserver/LightNetwork.h rename to paddle/legacy/pserver/LightNetwork.h index bcfc9655e9..380f86832f 100644 --- a/paddle/pserver/LightNetwork.h +++ b/paddle/legacy/pserver/LightNetwork.h @@ -21,7 +21,7 @@ limitations under the License. */ #include #include -#include "paddle/utils/Thread.h" +#include "paddle/legacy/utils/Thread.h" struct sxi_socket; diff --git a/paddle/pserver/ParameterClient2.cpp b/paddle/legacy/pserver/ParameterClient2.cpp similarity index 99% rename from paddle/pserver/ParameterClient2.cpp rename to paddle/legacy/pserver/ParameterClient2.cpp index 43e4902b0f..4c544ddc28 100644 --- a/paddle/pserver/ParameterClient2.cpp +++ b/paddle/legacy/pserver/ParameterClient2.cpp @@ -15,10 +15,10 @@ limitations under the License. */ #include #include "ParameterClient2.h" -#include "paddle/math/SparseRowMatrix.h" -#include "paddle/utils/Flags.h" -#include "paddle/utils/Stat.h" -#include "paddle/utils/StringUtil.h" +#include "paddle/legacy/math/SparseRowMatrix.h" +#include "paddle/legacy/utils/Flags.h" +#include "paddle/legacy/utils/Stat.h" +#include "paddle/legacy/utils/StringUtil.h" DEFINE_string(pservers, "127.0.0.1", "Comma separated addresses of pservers"); DEFINE_int32(parallel_thread_num, 1, "Thread number for parameter send"); diff --git a/paddle/pserver/ParameterClient2.h b/paddle/legacy/pserver/ParameterClient2.h similarity index 98% rename from paddle/pserver/ParameterClient2.h rename to paddle/legacy/pserver/ParameterClient2.h index c96bb78715..9320e19c4d 100644 --- a/paddle/pserver/ParameterClient2.h +++ b/paddle/legacy/pserver/ParameterClient2.h @@ -19,15 +19,15 @@ limitations under the License. */ #include #include -#include "paddle/math/Matrix.h" -#include "paddle/math/Vector.h" -#include "paddle/parameter/Parameter.h" -#include "paddle/pserver/BaseClient.h" -#include "paddle/utils/Common.h" -#include "paddle/utils/Flags.h" -#include "paddle/utils/Locks.h" -#include "paddle/utils/Queue.h" -#include "paddle/utils/Util.h" +#include "paddle/legacy/math/Matrix.h" +#include "paddle/legacy/math/Vector.h" +#include "paddle/legacy/parameter/Parameter.h" +#include "paddle/legacy/pserver/BaseClient.h" +#include "paddle/legacy/utils/Common.h" +#include "paddle/legacy/utils/Flags.h" +#include "paddle/legacy/utils/Locks.h" +#include "paddle/legacy/utils/Queue.h" +#include "paddle/legacy/utils/Util.h" #include "ParameterService.pb.h" diff --git a/paddle/pserver/ParameterServer2.cpp b/paddle/legacy/pserver/ParameterServer2.cpp similarity index 98% rename from paddle/pserver/ParameterServer2.cpp rename to paddle/legacy/pserver/ParameterServer2.cpp index f8814714c2..8533a322d9 100644 --- a/paddle/pserver/ParameterServer2.cpp +++ b/paddle/legacy/pserver/ParameterServer2.cpp @@ -17,19 +17,19 @@ limitations under the License. */ #include #include -#include "paddle/math/SIMDFunctions.h" -#include "paddle/parameter/AverageOptimizer.h" -#include "paddle/parameter/FirstOrderOptimizer.h" -#include "paddle/parameter/OptimizerFunctions.h" -#include "paddle/parameter/OptimizerWithRegularizer.h" -#include "paddle/parameter/ParameterOptimizer.h" -#include "paddle/parameter/ParameterUpdateFunctions.h" -#include "paddle/parameter/Regularizer.h" -#include "paddle/parameter/ThreadLocalBuffer.h" -#include "paddle/utils/Flags.h" -#include "paddle/utils/GlobalConstants.h" -#include "paddle/utils/Stat.h" -#include "paddle/utils/StringUtil.h" +#include "paddle/legacy/math/SIMDFunctions.h" +#include "paddle/legacy/parameter/AverageOptimizer.h" +#include "paddle/legacy/parameter/FirstOrderOptimizer.h" +#include "paddle/legacy/parameter/OptimizerFunctions.h" +#include "paddle/legacy/parameter/OptimizerWithRegularizer.h" +#include "paddle/legacy/parameter/ParameterOptimizer.h" +#include "paddle/legacy/parameter/ParameterUpdateFunctions.h" +#include "paddle/legacy/parameter/Regularizer.h" +#include "paddle/legacy/parameter/ThreadLocalBuffer.h" +#include "paddle/legacy/utils/Flags.h" +#include "paddle/legacy/utils/GlobalConstants.h" +#include "paddle/legacy/utils/Stat.h" +#include "paddle/legacy/utils/StringUtil.h" DEFINE_int32(pserver_num_threads, 1, "number of threads for sync op exec"); DEFINE_double(async_lagged_ratio_min, diff --git a/paddle/pserver/ParameterServer2.h b/paddle/legacy/pserver/ParameterServer2.h similarity index 98% rename from paddle/pserver/ParameterServer2.h rename to paddle/legacy/pserver/ParameterServer2.h index 0b8ef5c170..069e730ea4 100644 --- a/paddle/pserver/ParameterServer2.h +++ b/paddle/legacy/pserver/ParameterServer2.h @@ -25,14 +25,14 @@ limitations under the License. */ #include #include -#include "paddle/math/Matrix.h" -#include "paddle/math/Vector.h" -#include "paddle/parameter/Parameter.h" -#include "paddle/parameter/ParameterOptimizer.h" -#include "paddle/utils/Common.h" -#include "paddle/utils/Locks.h" -#include "paddle/utils/Stat.h" -#include "paddle/utils/ThreadLocal.h" +#include "paddle/legacy/math/Matrix.h" +#include "paddle/legacy/math/Vector.h" +#include "paddle/legacy/parameter/Parameter.h" +#include "paddle/legacy/parameter/ParameterOptimizer.h" +#include "paddle/legacy/utils/Common.h" +#include "paddle/legacy/utils/Locks.h" +#include "paddle/legacy/utils/Stat.h" +#include "paddle/legacy/utils/ThreadLocal.h" #include "ParameterService.pb.h" diff --git a/paddle/pserver/ParameterServer2Main.cpp b/paddle/legacy/pserver/ParameterServer2Main.cpp similarity index 100% rename from paddle/pserver/ParameterServer2Main.cpp rename to paddle/legacy/pserver/ParameterServer2Main.cpp diff --git a/paddle/pserver/ParameterServerController.cpp b/paddle/legacy/pserver/ParameterServerController.cpp similarity index 100% rename from paddle/pserver/ParameterServerController.cpp rename to paddle/legacy/pserver/ParameterServerController.cpp diff --git a/paddle/pserver/ParameterServerController.h b/paddle/legacy/pserver/ParameterServerController.h similarity index 97% rename from paddle/pserver/ParameterServerController.h rename to paddle/legacy/pserver/ParameterServerController.h index 1308d62fb1..b90d0cbcea 100644 --- a/paddle/pserver/ParameterServerController.h +++ b/paddle/legacy/pserver/ParameterServerController.h @@ -17,7 +17,7 @@ limitations under the License. */ #include "ParameterServer2.h" #include "ParameterServerConfig.pb.h" #include "RDMANetwork.h" -#include "paddle/utils/StringUtil.h" +#include "paddle/legacy/utils/StringUtil.h" namespace paddle { diff --git a/paddle/pserver/ProtoServer.cpp b/paddle/legacy/pserver/ProtoServer.cpp similarity index 100% rename from paddle/pserver/ProtoServer.cpp rename to paddle/legacy/pserver/ProtoServer.cpp diff --git a/paddle/pserver/ProtoServer.h b/paddle/legacy/pserver/ProtoServer.h similarity index 100% rename from paddle/pserver/ProtoServer.h rename to paddle/legacy/pserver/ProtoServer.h diff --git a/paddle/pserver/RDMANetwork.h b/paddle/legacy/pserver/RDMANetwork.h similarity index 98% rename from paddle/pserver/RDMANetwork.h rename to paddle/legacy/pserver/RDMANetwork.h index 83db6b9df7..c87056f72c 100644 --- a/paddle/pserver/RDMANetwork.h +++ b/paddle/legacy/pserver/RDMANetwork.h @@ -19,7 +19,7 @@ limitations under the License. */ #else #define PROMPT_ERR() LOG(FATAL) << "Paddle is not compiled with rdma" #endif -#include "paddle/utils/Logging.h" +#include "paddle/legacy/utils/Logging.h" #include struct sxi_sock; diff --git a/paddle/pserver/SocketChannel.cpp b/paddle/legacy/pserver/SocketChannel.cpp similarity index 99% rename from paddle/pserver/SocketChannel.cpp rename to paddle/legacy/pserver/SocketChannel.cpp index 72e6943408..79c763c62b 100644 --- a/paddle/pserver/SocketChannel.cpp +++ b/paddle/legacy/pserver/SocketChannel.cpp @@ -22,7 +22,7 @@ limitations under the License. */ #include #include "RDMANetwork.h" -#include "paddle/utils/Util.h" +#include "paddle/legacy/utils/Util.h" namespace paddle { diff --git a/paddle/pserver/SocketChannel.h b/paddle/legacy/pserver/SocketChannel.h similarity index 98% rename from paddle/pserver/SocketChannel.h rename to paddle/legacy/pserver/SocketChannel.h index 8b45ac5609..a7b3cd42f0 100644 --- a/paddle/pserver/SocketChannel.h +++ b/paddle/legacy/pserver/SocketChannel.h @@ -14,7 +14,7 @@ limitations under the License. */ #pragma once -#include "paddle/utils/Util.h" +#include "paddle/legacy/utils/Util.h" #include diff --git a/paddle/pserver/SparseParameterDistribution.cpp b/paddle/legacy/pserver/SparseParameterDistribution.cpp similarity index 97% rename from paddle/pserver/SparseParameterDistribution.cpp rename to paddle/legacy/pserver/SparseParameterDistribution.cpp index bb247f389c..3f17b228f0 100644 --- a/paddle/pserver/SparseParameterDistribution.cpp +++ b/paddle/legacy/pserver/SparseParameterDistribution.cpp @@ -14,9 +14,9 @@ limitations under the License. */ #include -#include "paddle/utils/Logging.h" +#include "paddle/legacy/utils/Logging.h" -#include "paddle/utils/Flags.h" +#include "paddle/legacy/utils/Flags.h" #include "SparseParameterDistribution.h" diff --git a/paddle/pserver/SparseParameterDistribution.h b/paddle/legacy/pserver/SparseParameterDistribution.h similarity index 97% rename from paddle/pserver/SparseParameterDistribution.h rename to paddle/legacy/pserver/SparseParameterDistribution.h index e168f36c75..ee78029958 100644 --- a/paddle/pserver/SparseParameterDistribution.h +++ b/paddle/legacy/pserver/SparseParameterDistribution.h @@ -16,7 +16,7 @@ limitations under the License. */ #include #include -#include "paddle/utils/Logging.h" +#include "paddle/legacy/utils/Logging.h" namespace paddle { diff --git a/paddle/pserver/test/.gitignore b/paddle/legacy/pserver/test/.gitignore similarity index 100% rename from paddle/pserver/test/.gitignore rename to paddle/legacy/pserver/test/.gitignore diff --git a/paddle/pserver/test/CMakeLists.txt b/paddle/legacy/pserver/test/CMakeLists.txt similarity index 100% rename from paddle/pserver/test/CMakeLists.txt rename to paddle/legacy/pserver/test/CMakeLists.txt diff --git a/paddle/pserver/test/SocketTest.cpp b/paddle/legacy/pserver/test/SocketTest.cpp similarity index 98% rename from paddle/pserver/test/SocketTest.cpp rename to paddle/legacy/pserver/test/SocketTest.cpp index 206cd17c37..3a781fcbf6 100644 --- a/paddle/pserver/test/SocketTest.cpp +++ b/paddle/legacy/pserver/test/SocketTest.cpp @@ -12,7 +12,7 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. */ -#include "paddle/utils/Util.h" +#include "paddle/legacy/utils/Util.h" #include #include @@ -22,8 +22,8 @@ limitations under the License. */ #include -#include "paddle/math/Vector.h" -#include "paddle/utils/Logging.h" +#include "paddle/legacy/math/Vector.h" +#include "paddle/legacy/utils/Logging.h" struct MessageHeader { int64_t dataLength; diff --git a/paddle/pserver/test/test_ParameterServer2.cpp b/paddle/legacy/pserver/test/test_ParameterServer2.cpp similarity index 98% rename from paddle/pserver/test/test_ParameterServer2.cpp rename to paddle/legacy/pserver/test/test_ParameterServer2.cpp index 01d179258d..542e80e046 100644 --- a/paddle/pserver/test/test_ParameterServer2.cpp +++ b/paddle/legacy/pserver/test/test_ParameterServer2.cpp @@ -13,10 +13,10 @@ See the License for the specific language governing permissions and limitations under the License. */ #include -#include -#include -#include -#include +#include +#include +#include +#include using namespace paddle; // NOLINT using namespace std; // NOLINT diff --git a/paddle/pserver/test/test_ProtoServer.cpp b/paddle/legacy/pserver/test/test_ProtoServer.cpp similarity index 96% rename from paddle/pserver/test/test_ProtoServer.cpp rename to paddle/legacy/pserver/test/test_ProtoServer.cpp index a66b14a1cc..f7ab2e8af4 100644 --- a/paddle/pserver/test/test_ProtoServer.cpp +++ b/paddle/legacy/pserver/test/test_ProtoServer.cpp @@ -15,10 +15,10 @@ limitations under the License. */ #include #include #include "ParameterService.pb.h" -#include "paddle/math/Vector.h" -#include "paddle/pserver/ProtoServer.h" -#include "paddle/utils/Stat.h" -#include "paddle/utils/Util.h" +#include "paddle/legacy/math/Vector.h" +#include "paddle/legacy/pserver/ProtoServer.h" +#include "paddle/legacy/utils/Stat.h" +#include "paddle/legacy/utils/Util.h" DEFINE_string(server_addr, "127.0.0.1", "Server address"); DEFINE_int64(dim, 50000000, "Data size"); diff --git a/paddle/pserver/test/test_ProtoServer.sh b/paddle/legacy/pserver/test/test_ProtoServer.sh similarity index 94% rename from paddle/pserver/test/test_ProtoServer.sh rename to paddle/legacy/pserver/test/test_ProtoServer.sh index 970c90b494..1439350847 100755 --- a/paddle/pserver/test/test_ProtoServer.sh +++ b/paddle/legacy/pserver/test/test_ProtoServer.sh @@ -19,7 +19,7 @@ do if [ $port_used_num -eq 0 ] then echo $port; - pserver/test/test_ProtoServer --port=$port + legacy/pserver/test/test_ProtoServer --port=$port if [ $? -eq 0 ] then exit 0 diff --git a/paddle/trainer/CMakeLists.txt b/paddle/legacy/trainer/CMakeLists.txt similarity index 100% rename from paddle/trainer/CMakeLists.txt rename to paddle/legacy/trainer/CMakeLists.txt diff --git a/paddle/trainer/MergeModel.cpp b/paddle/legacy/trainer/MergeModel.cpp similarity index 95% rename from paddle/trainer/MergeModel.cpp rename to paddle/legacy/trainer/MergeModel.cpp index 56c38015fb..8a3601f192 100644 --- a/paddle/trainer/MergeModel.cpp +++ b/paddle/legacy/trainer/MergeModel.cpp @@ -16,8 +16,8 @@ limitations under the License. */ #include "ParamUtil.h" #include "Trainer.h" -#include "paddle/pserver/ParameterServer2.h" -#include "paddle/utils/PythonUtil.h" +#include "paddle/legacy/pserver/ParameterServer2.h" +#include "paddle/legacy/utils/PythonUtil.h" DEFINE_string(model_dir, "", "Directory for separated model files"); DEFINE_string(config_file, "", "Config file for the model"); diff --git a/paddle/trainer/NewRemoteParameterUpdater.cpp b/paddle/legacy/trainer/NewRemoteParameterUpdater.cpp similarity index 99% rename from paddle/trainer/NewRemoteParameterUpdater.cpp rename to paddle/legacy/trainer/NewRemoteParameterUpdater.cpp index 410ac6d95c..cdd832acd1 100644 --- a/paddle/trainer/NewRemoteParameterUpdater.cpp +++ b/paddle/legacy/trainer/NewRemoteParameterUpdater.cpp @@ -14,7 +14,7 @@ limitations under the License. */ #include "NewRemoteParameterUpdater.h" #include "Trainer.h" -#include "paddle/utils/Stat.h" +#include "paddle/legacy/utils/Stat.h" DECLARE_int32(trainer_id); DECLARE_string(save_dir); diff --git a/paddle/trainer/NewRemoteParameterUpdater.h b/paddle/legacy/trainer/NewRemoteParameterUpdater.h similarity index 96% rename from paddle/trainer/NewRemoteParameterUpdater.h rename to paddle/legacy/trainer/NewRemoteParameterUpdater.h index 02693c675e..707e9ceb9b 100644 --- a/paddle/trainer/NewRemoteParameterUpdater.h +++ b/paddle/legacy/trainer/NewRemoteParameterUpdater.h @@ -19,9 +19,9 @@ limitations under the License. */ #include "OptimizerConfig.pb.h" #include "ParameterUpdater.h" #include "libpaddle_pserver_cclient.h" -#include "paddle/pserver/ParameterClient2.h" -#include "paddle/utils/Queue.h" -#include "paddle/utils/Util.h" +#include "paddle/legacy/pserver/ParameterClient2.h" +#include "paddle/legacy/utils/Queue.h" +#include "paddle/legacy/utils/Util.h" namespace paddle { diff --git a/paddle/trainer/ParamUtil.cpp b/paddle/legacy/trainer/ParamUtil.cpp similarity index 93% rename from paddle/trainer/ParamUtil.cpp rename to paddle/legacy/trainer/ParamUtil.cpp index ffbca42e10..b5aba32dee 100644 --- a/paddle/trainer/ParamUtil.cpp +++ b/paddle/legacy/trainer/ParamUtil.cpp @@ -23,16 +23,16 @@ limitations under the License. */ #include #include -#include +#include -#include "paddle/utils/GlobalConstants.h" -#include "paddle/utils/PythonUtil.h" -#include "paddle/utils/Stat.h" -#include "paddle/utils/Util.h" +#include "paddle/legacy/utils/GlobalConstants.h" +#include "paddle/legacy/utils/PythonUtil.h" +#include "paddle/legacy/utils/Stat.h" +#include "paddle/legacy/utils/Util.h" #include "TesterConfig.h" -#include "paddle/gserver/gradientmachines/NeuralNetwork.h" -#include "paddle/gserver/layers/ValidationLayer.h" +#include "paddle/legacy/gserver/gradientmachines/NeuralNetwork.h" +#include "paddle/legacy/gserver/layers/ValidationLayer.h" namespace paddle { diff --git a/paddle/trainer/ParamUtil.h b/paddle/legacy/trainer/ParamUtil.h similarity index 95% rename from paddle/trainer/ParamUtil.h rename to paddle/legacy/trainer/ParamUtil.h index 10746b4d58..0778696776 100644 --- a/paddle/trainer/ParamUtil.h +++ b/paddle/legacy/trainer/ParamUtil.h @@ -14,13 +14,13 @@ limitations under the License. */ #pragma once -#include "paddle/utils/Util.h" +#include "paddle/legacy/utils/Util.h" #include #include "hl_gpu.h" -#include "paddle/gserver/dataproviders/DataProvider.h" -#include "paddle/gserver/gradientmachines/GradientMachine.h" +#include "paddle/legacy/gserver/dataproviders/DataProvider.h" +#include "paddle/legacy/gserver/gradientmachines/GradientMachine.h" #include #include diff --git a/paddle/trainer/ParameterUpdater.cpp b/paddle/legacy/trainer/ParameterUpdater.cpp similarity index 98% rename from paddle/trainer/ParameterUpdater.cpp rename to paddle/legacy/trainer/ParameterUpdater.cpp index 4e9e890c85..549fb0332d 100644 --- a/paddle/trainer/ParameterUpdater.cpp +++ b/paddle/legacy/trainer/ParameterUpdater.cpp @@ -14,9 +14,9 @@ limitations under the License. */ #include "ParameterUpdater.h" -#include "paddle/utils/Logging.h" +#include "paddle/legacy/utils/Logging.h" -#include "paddle/utils/Thread.h" +#include "paddle/legacy/utils/Thread.h" namespace paddle { diff --git a/paddle/trainer/ParameterUpdater.h b/paddle/legacy/trainer/ParameterUpdater.h similarity index 94% rename from paddle/trainer/ParameterUpdater.h rename to paddle/legacy/trainer/ParameterUpdater.h index ef7ab92eca..acddc3702d 100644 --- a/paddle/trainer/ParameterUpdater.h +++ b/paddle/legacy/trainer/ParameterUpdater.h @@ -14,18 +14,18 @@ limitations under the License. */ #pragma once -#include "paddle/utils/Thread.h" -#include "paddle/utils/Util.h" +#include "paddle/legacy/utils/Thread.h" +#include "paddle/legacy/utils/Util.h" -#include "paddle/parameter/AverageOptimizer.h" -#include "paddle/parameter/FirstOrderOptimizer.h" -#include "paddle/parameter/OptimizerFunctions.h" -#include "paddle/parameter/OptimizerWithRegularizer.h" -#include "paddle/parameter/Parameter.h" -#include "paddle/parameter/ParameterUpdaterBase.h" +#include "paddle/legacy/parameter/AverageOptimizer.h" +#include "paddle/legacy/parameter/FirstOrderOptimizer.h" +#include "paddle/legacy/parameter/OptimizerFunctions.h" +#include "paddle/legacy/parameter/OptimizerWithRegularizer.h" +#include "paddle/legacy/parameter/Parameter.h" +#include "paddle/legacy/parameter/ParameterUpdaterBase.h" #include "TrainerConfig.pb.h" -#include "paddle/gserver/layers/Layer.h" +#include "paddle/legacy/gserver/layers/Layer.h" #include #include diff --git a/paddle/trainer/RemoteParameterUpdater.cpp b/paddle/legacy/trainer/RemoteParameterUpdater.cpp similarity index 99% rename from paddle/trainer/RemoteParameterUpdater.cpp rename to paddle/legacy/trainer/RemoteParameterUpdater.cpp index 7314266cb2..5de1cc7827 100644 --- a/paddle/trainer/RemoteParameterUpdater.cpp +++ b/paddle/legacy/trainer/RemoteParameterUpdater.cpp @@ -14,8 +14,8 @@ limitations under the License. */ #include "RemoteParameterUpdater.h" #include "Trainer.h" -#include "paddle/utils/GlobalConstants.h" -#include "paddle/utils/Stat.h" +#include "paddle/legacy/utils/GlobalConstants.h" +#include "paddle/legacy/utils/Stat.h" DECLARE_int32(trainer_id); DECLARE_string(save_dir); diff --git a/paddle/trainer/RemoteParameterUpdater.h b/paddle/legacy/trainer/RemoteParameterUpdater.h similarity index 99% rename from paddle/trainer/RemoteParameterUpdater.h rename to paddle/legacy/trainer/RemoteParameterUpdater.h index 3a40a46354..6846853298 100644 --- a/paddle/trainer/RemoteParameterUpdater.h +++ b/paddle/legacy/trainer/RemoteParameterUpdater.h @@ -17,9 +17,9 @@ limitations under the License. */ #include #include #include "ParameterUpdater.h" -#include "paddle/pserver/ParameterClient2.h" -#include "paddle/utils/Queue.h" -#include "paddle/utils/Util.h" +#include "paddle/legacy/pserver/ParameterClient2.h" +#include "paddle/legacy/utils/Queue.h" +#include "paddle/legacy/utils/Util.h" namespace paddle { diff --git a/paddle/trainer/Tester.cpp b/paddle/legacy/trainer/Tester.cpp similarity index 97% rename from paddle/trainer/Tester.cpp rename to paddle/legacy/trainer/Tester.cpp index 16e676d602..d977ca9657 100644 --- a/paddle/trainer/Tester.cpp +++ b/paddle/legacy/trainer/Tester.cpp @@ -24,15 +24,15 @@ limitations under the License. */ #include -#include "paddle/utils/GlobalConstants.h" -#include "paddle/utils/PythonUtil.h" -#include "paddle/utils/Stat.h" -#include "paddle/utils/Util.h" +#include "paddle/legacy/utils/GlobalConstants.h" +#include "paddle/legacy/utils/PythonUtil.h" +#include "paddle/legacy/utils/Stat.h" +#include "paddle/legacy/utils/Util.h" #include "TesterConfig.h" -#include "paddle/gserver/gradientmachines/GradientMachineMode.h" -#include "paddle/gserver/gradientmachines/NeuralNetwork.h" -#include "paddle/gserver/layers/ValidationLayer.h" +#include "paddle/legacy/gserver/gradientmachines/GradientMachineMode.h" +#include "paddle/legacy/gserver/gradientmachines/NeuralNetwork.h" +#include "paddle/legacy/gserver/layers/ValidationLayer.h" namespace paddle { diff --git a/paddle/trainer/Tester.h b/paddle/legacy/trainer/Tester.h similarity index 96% rename from paddle/trainer/Tester.h rename to paddle/legacy/trainer/Tester.h index 801c77e311..a298602d1d 100644 --- a/paddle/trainer/Tester.h +++ b/paddle/legacy/trainer/Tester.h @@ -14,13 +14,13 @@ limitations under the License. */ #pragma once -#include "paddle/utils/Util.h" +#include "paddle/legacy/utils/Util.h" #include #include "hl_gpu.h" -#include "paddle/gserver/dataproviders/DataProvider.h" -#include "paddle/gserver/gradientmachines/GradientMachine.h" +#include "paddle/legacy/gserver/dataproviders/DataProvider.h" +#include "paddle/legacy/gserver/gradientmachines/GradientMachine.h" #include "TrainerConfig.pb.h" diff --git a/paddle/trainer/TesterConfig.h b/paddle/legacy/trainer/TesterConfig.h similarity index 94% rename from paddle/trainer/TesterConfig.h rename to paddle/legacy/trainer/TesterConfig.h index 68d4c931ff..6c78f7cda3 100644 --- a/paddle/trainer/TesterConfig.h +++ b/paddle/legacy/trainer/TesterConfig.h @@ -14,12 +14,12 @@ limitations under the License. */ #pragma once -#include "paddle/utils/Util.h" +#include "paddle/legacy/utils/Util.h" #include #include "hl_gpu.h" -#include "paddle/gserver/gradientmachines/GradientMachine.h" +#include "paddle/legacy/gserver/gradientmachines/GradientMachine.h" #include "TrainerConfig.pb.h" diff --git a/paddle/trainer/ThreadParameterUpdater.cpp b/paddle/legacy/trainer/ThreadParameterUpdater.cpp similarity index 98% rename from paddle/trainer/ThreadParameterUpdater.cpp rename to paddle/legacy/trainer/ThreadParameterUpdater.cpp index 3c85c3aaac..0601bdf24e 100644 --- a/paddle/trainer/ThreadParameterUpdater.cpp +++ b/paddle/legacy/trainer/ThreadParameterUpdater.cpp @@ -14,11 +14,11 @@ limitations under the License. */ #include "ThreadParameterUpdater.h" -#include "paddle/utils/Logging.h" +#include "paddle/legacy/utils/Logging.h" -#include "paddle/math/SparseRowMatrix.h" -#include "paddle/parameter/ThreadLocalBuffer.h" -#include "paddle/utils/Thread.h" +#include "paddle/legacy/math/SparseRowMatrix.h" +#include "paddle/legacy/parameter/ThreadLocalBuffer.h" +#include "paddle/legacy/utils/Thread.h" DECLARE_int32(trainer_count); diff --git a/paddle/trainer/ThreadParameterUpdater.h b/paddle/legacy/trainer/ThreadParameterUpdater.h similarity index 88% rename from paddle/trainer/ThreadParameterUpdater.h rename to paddle/legacy/trainer/ThreadParameterUpdater.h index b5e6a7ce3c..172287d4eb 100644 --- a/paddle/trainer/ThreadParameterUpdater.h +++ b/paddle/legacy/trainer/ThreadParameterUpdater.h @@ -14,13 +14,13 @@ limitations under the License. */ #pragma once -#include "paddle/parameter/AverageOptimizer.h" -#include "paddle/parameter/FirstOrderOptimizer.h" -#include "paddle/parameter/OptimizerFunctions.h" -#include "paddle/parameter/OptimizerWithRegularizer.h" -#include "paddle/parameter/Parameter.h" -#include "paddle/parameter/Regularizer.h" -#include "paddle/utils/Util.h" +#include "paddle/legacy/parameter/AverageOptimizer.h" +#include "paddle/legacy/parameter/FirstOrderOptimizer.h" +#include "paddle/legacy/parameter/OptimizerFunctions.h" +#include "paddle/legacy/parameter/OptimizerWithRegularizer.h" +#include "paddle/legacy/parameter/Parameter.h" +#include "paddle/legacy/parameter/Regularizer.h" +#include "paddle/legacy/utils/Util.h" #include #include diff --git a/paddle/trainer/Trainer.cpp b/paddle/legacy/trainer/Trainer.cpp similarity index 98% rename from paddle/trainer/Trainer.cpp rename to paddle/legacy/trainer/Trainer.cpp index 3e4a2b5fa8..2db754793c 100644 --- a/paddle/trainer/Trainer.cpp +++ b/paddle/legacy/trainer/Trainer.cpp @@ -23,19 +23,19 @@ limitations under the License. */ #include -#include "paddle/utils/Common.h" -#include "paddle/utils/GlobalConstants.h" -#include "paddle/utils/PythonUtil.h" -#include "paddle/utils/Stat.h" -#include "paddle/utils/Util.h" +#include "paddle/legacy/utils/Common.h" +#include "paddle/legacy/utils/GlobalConstants.h" +#include "paddle/legacy/utils/PythonUtil.h" +#include "paddle/legacy/utils/Stat.h" +#include "paddle/legacy/utils/Util.h" #include "RemoteParameterUpdater.h" #include "TesterConfig.h" #include "ThreadParameterUpdater.h" #include "TrainerConfigHelper.h" -#include "paddle/gserver/gradientmachines/GradientMachineMode.h" -#include "paddle/gserver/gradientmachines/NeuralNetwork.h" -#include "paddle/gserver/layers/ValidationLayer.h" +#include "paddle/legacy/gserver/gradientmachines/GradientMachineMode.h" +#include "paddle/legacy/gserver/gradientmachines/NeuralNetwork.h" +#include "paddle/legacy/gserver/layers/ValidationLayer.h" DEFINE_string(config, "", "Trainer config file"); diff --git a/paddle/trainer/Trainer.h b/paddle/legacy/trainer/Trainer.h similarity index 96% rename from paddle/trainer/Trainer.h rename to paddle/legacy/trainer/Trainer.h index 78127b7be5..b467f9af0c 100644 --- a/paddle/trainer/Trainer.h +++ b/paddle/legacy/trainer/Trainer.h @@ -14,13 +14,13 @@ limitations under the License. */ #pragma once -#include "paddle/utils/Util.h" +#include "paddle/legacy/utils/Util.h" #include #include "hl_gpu.h" -#include "paddle/gserver/dataproviders/DataProvider.h" -#include "paddle/gserver/gradientmachines/GradientMachine.h" +#include "paddle/legacy/gserver/dataproviders/DataProvider.h" +#include "paddle/legacy/gserver/gradientmachines/GradientMachine.h" #include #include diff --git a/paddle/trainer/TrainerBenchmark.cpp b/paddle/legacy/trainer/TrainerBenchmark.cpp similarity index 96% rename from paddle/trainer/TrainerBenchmark.cpp rename to paddle/legacy/trainer/TrainerBenchmark.cpp index 173653c816..7f5bd23354 100644 --- a/paddle/trainer/TrainerBenchmark.cpp +++ b/paddle/legacy/trainer/TrainerBenchmark.cpp @@ -15,8 +15,8 @@ limitations under the License. */ #undef PADDLE_DISABLE_TIMER #include "Trainer.h" -#include "paddle/utils/Stat.h" -#include "paddle/utils/Util.h" +#include "paddle/legacy/utils/Stat.h" +#include "paddle/legacy/utils/Util.h" DECLARE_int32(test_period); diff --git a/paddle/trainer/TrainerConfigHelper.cpp b/paddle/legacy/trainer/TrainerConfigHelper.cpp similarity index 98% rename from paddle/trainer/TrainerConfigHelper.cpp rename to paddle/legacy/trainer/TrainerConfigHelper.cpp index 2b68d89e48..4d31ba8d71 100644 --- a/paddle/trainer/TrainerConfigHelper.cpp +++ b/paddle/legacy/trainer/TrainerConfigHelper.cpp @@ -15,8 +15,8 @@ limitations under the License. */ #include "TrainerConfigHelper.h" #include "ParamUtil.h" #include "TrainerConfig.pb.h" -#include "paddle/utils/Flags.h" -#include "paddle/utils/PythonUtil.h" +#include "paddle/legacy/utils/Flags.h" +#include "paddle/legacy/utils/PythonUtil.h" DECLARE_string(config); DECLARE_string(init_model_path); diff --git a/paddle/trainer/TrainerConfigHelper.h b/paddle/legacy/trainer/TrainerConfigHelper.h similarity index 98% rename from paddle/trainer/TrainerConfigHelper.h rename to paddle/legacy/trainer/TrainerConfigHelper.h index b21dda964e..0e428bea2c 100644 --- a/paddle/trainer/TrainerConfigHelper.h +++ b/paddle/legacy/trainer/TrainerConfigHelper.h @@ -14,8 +14,8 @@ limitations under the License. */ #pragma once -#include -#include +#include +#include #include namespace paddle { diff --git a/paddle/trainer/TrainerInternal.cpp b/paddle/legacy/trainer/TrainerInternal.cpp similarity index 97% rename from paddle/trainer/TrainerInternal.cpp rename to paddle/legacy/trainer/TrainerInternal.cpp index 4c5d4a0913..ee3dea6340 100644 --- a/paddle/trainer/TrainerInternal.cpp +++ b/paddle/legacy/trainer/TrainerInternal.cpp @@ -24,12 +24,12 @@ limitations under the License. */ #include -#include "paddle/gserver/gradientmachines/NeuralNetwork.h" -#include "paddle/gserver/layers/ValidationLayer.h" -#include "paddle/utils/GlobalConstants.h" -#include "paddle/utils/PythonUtil.h" -#include "paddle/utils/Stat.h" -#include "paddle/utils/Util.h" +#include "paddle/legacy/gserver/gradientmachines/NeuralNetwork.h" +#include "paddle/legacy/gserver/layers/ValidationLayer.h" +#include "paddle/legacy/utils/GlobalConstants.h" +#include "paddle/legacy/utils/PythonUtil.h" +#include "paddle/legacy/utils/Stat.h" +#include "paddle/legacy/utils/Util.h" #include "RemoteParameterUpdater.h" #include "ThreadParameterUpdater.h" diff --git a/paddle/trainer/TrainerInternal.h b/paddle/legacy/trainer/TrainerInternal.h similarity index 97% rename from paddle/trainer/TrainerInternal.h rename to paddle/legacy/trainer/TrainerInternal.h index 48ee53a5e6..93919a68fc 100644 --- a/paddle/trainer/TrainerInternal.h +++ b/paddle/legacy/trainer/TrainerInternal.h @@ -14,7 +14,7 @@ limitations under the License. */ #pragma once -#include "paddle/utils/Util.h" +#include "paddle/legacy/utils/Util.h" #include #include @@ -25,7 +25,7 @@ limitations under the License. */ #include "TrainerConfigHelper.h" #include "TrainerInternalConfig.h" #include "hl_gpu.h" -#include "paddle/gserver/gradientmachines/GradientMachine.h" +#include "paddle/legacy/gserver/gradientmachines/GradientMachine.h" namespace paddle { diff --git a/paddle/trainer/TrainerInternalConfig.cpp b/paddle/legacy/trainer/TrainerInternalConfig.cpp similarity index 100% rename from paddle/trainer/TrainerInternalConfig.cpp rename to paddle/legacy/trainer/TrainerInternalConfig.cpp diff --git a/paddle/trainer/TrainerInternalConfig.h b/paddle/legacy/trainer/TrainerInternalConfig.h similarity index 98% rename from paddle/trainer/TrainerInternalConfig.h rename to paddle/legacy/trainer/TrainerInternalConfig.h index 43aae38102..b91b539323 100644 --- a/paddle/trainer/TrainerInternalConfig.h +++ b/paddle/legacy/trainer/TrainerInternalConfig.h @@ -14,12 +14,12 @@ limitations under the License. */ #pragma once -#include "paddle/utils/Util.h" +#include "paddle/legacy/utils/Util.h" #include #include "hl_gpu.h" -#include "paddle/gserver/gradientmachines/GradientMachine.h" +#include "paddle/legacy/gserver/gradientmachines/GradientMachine.h" #include "TrainerConfig.pb.h" diff --git a/paddle/trainer/TrainerMain.cpp b/paddle/legacy/trainer/TrainerMain.cpp similarity index 94% rename from paddle/trainer/TrainerMain.cpp rename to paddle/legacy/trainer/TrainerMain.cpp index c5c1d484e5..911aeba192 100644 --- a/paddle/trainer/TrainerMain.cpp +++ b/paddle/legacy/trainer/TrainerMain.cpp @@ -13,8 +13,8 @@ See the License for the specific language governing permissions and limitations under the License. */ #include -#include "paddle/pserver/ParameterServerController.h" -#include "paddle/utils/PythonUtil.h" +#include "paddle/legacy/pserver/ParameterServerController.h" +#include "paddle/legacy/utils/PythonUtil.h" #include "ParamUtil.h" #include "Trainer.h" diff --git a/paddle/trainer/tests/.gitignore b/paddle/legacy/trainer/tests/.gitignore similarity index 100% rename from paddle/trainer/tests/.gitignore rename to paddle/legacy/trainer/tests/.gitignore diff --git a/paddle/trainer/tests/CMakeLists.txt b/paddle/legacy/trainer/tests/CMakeLists.txt similarity index 89% rename from paddle/trainer/tests/CMakeLists.txt rename to paddle/legacy/trainer/tests/CMakeLists.txt index 12c9ea8cef..08548bea4c 100644 --- a/paddle/trainer/tests/CMakeLists.txt +++ b/paddle/legacy/trainer/tests/CMakeLists.txt @@ -5,7 +5,7 @@ add_custom_target(copy_trainer_conf ALL DEPENDS sample_trainer_config.conf) set(PYTHON_PATH ${PADDLE_SOURCE_DIR}/paddle/.set_python_path.sh -d - ${PADDLE_BINARY_DIR}/python/:${PADDLE_BINARY_DIR}/paddle/trainer/tests) + ${PADDLE_BINARY_DIR}/python/:${PADDLE_BINARY_DIR}/paddle/legacy/trainer/tests) function(trainer_test TARGET) add_unittest_without_exec(${TARGET} ${TARGET}.cpp) add_test(NAME ${TARGET} @@ -33,5 +33,5 @@ endif() #################### test_config_parser ######################### add_test(NAME test_config_parser COMMAND ${PYTHON_PATH} ${PYTHON_EXECUTABLE} - ${PADDLE_SOURCE_DIR}/paddle/trainer/tests/config_parser_test.py + ${PADDLE_SOURCE_DIR}/paddle/legacy/trainer/tests/config_parser_test.py WORKING_DIRECTORY ${PADDLE_BINARY_DIR}/paddle/) diff --git a/paddle/trainer/tests/__init__.py b/paddle/legacy/trainer/tests/__init__.py similarity index 100% rename from paddle/trainer/tests/__init__.py rename to paddle/legacy/trainer/tests/__init__.py diff --git a/paddle/trainer/tests/config_parser_test.py b/paddle/legacy/trainer/tests/config_parser_test.py similarity index 77% rename from paddle/trainer/tests/config_parser_test.py rename to paddle/legacy/trainer/tests/config_parser_test.py index db66ebb5b7..0d3d82cbda 100644 --- a/paddle/trainer/tests/config_parser_test.py +++ b/paddle/legacy/trainer/tests/config_parser_test.py @@ -15,8 +15,9 @@ from paddle.trainer.config_parser import parse_config_and_serialize if __name__ == '__main__': - parse_config_and_serialize('trainer/tests/test_config.conf', '') + parse_config_and_serialize('legacy/trainer/tests/test_config.conf', '') parse_config_and_serialize( - 'trainer/tests/sample_trainer_config.conf', + 'legacy/trainer/tests/sample_trainer_config.conf', 'extension_module_name=paddle.trainer.config_parser_extension') - parse_config_and_serialize('gserver/tests/pyDataProvider/trainer.conf', '') + parse_config_and_serialize( + 'legacy/gserver/tests/pyDataProvider/trainer.conf', '') diff --git a/paddle/trainer/tests/fake_file_list.list b/paddle/legacy/trainer/tests/fake_file_list.list similarity index 100% rename from paddle/trainer/tests/fake_file_list.list rename to paddle/legacy/trainer/tests/fake_file_list.list diff --git a/paddle/trainer/tests/picojson.h b/paddle/legacy/trainer/tests/picojson.h similarity index 100% rename from paddle/trainer/tests/picojson.h rename to paddle/legacy/trainer/tests/picojson.h diff --git a/paddle/trainer/tests/pydata_provider_wrapper_dir/test_pydata_provider_wrapper.data b/paddle/legacy/trainer/tests/pydata_provider_wrapper_dir/test_pydata_provider_wrapper.data similarity index 100% rename from paddle/trainer/tests/pydata_provider_wrapper_dir/test_pydata_provider_wrapper.data rename to paddle/legacy/trainer/tests/pydata_provider_wrapper_dir/test_pydata_provider_wrapper.data diff --git a/paddle/legacy/trainer/tests/pydata_provider_wrapper_dir/test_pydata_provider_wrapper.list b/paddle/legacy/trainer/tests/pydata_provider_wrapper_dir/test_pydata_provider_wrapper.list new file mode 100644 index 0000000000..11c1b1b38b --- /dev/null +++ b/paddle/legacy/trainer/tests/pydata_provider_wrapper_dir/test_pydata_provider_wrapper.list @@ -0,0 +1 @@ +legacy/trainer/tests/pydata_provider_wrapper_dir/test_pydata_provider_wrapper.data diff --git a/paddle/trainer/tests/rnn_gen_test_model_dir/r1.test.beam b/paddle/legacy/trainer/tests/rnn_gen_test_model_dir/r1.test.beam similarity index 100% rename from paddle/trainer/tests/rnn_gen_test_model_dir/r1.test.beam rename to paddle/legacy/trainer/tests/rnn_gen_test_model_dir/r1.test.beam diff --git a/paddle/trainer/tests/rnn_gen_test_model_dir/r1.test.nest b/paddle/legacy/trainer/tests/rnn_gen_test_model_dir/r1.test.nest similarity index 100% rename from paddle/trainer/tests/rnn_gen_test_model_dir/r1.test.nest rename to paddle/legacy/trainer/tests/rnn_gen_test_model_dir/r1.test.nest diff --git a/paddle/trainer/tests/rnn_gen_test_model_dir/r1.test.nobeam b/paddle/legacy/trainer/tests/rnn_gen_test_model_dir/r1.test.nobeam similarity index 100% rename from paddle/trainer/tests/rnn_gen_test_model_dir/r1.test.nobeam rename to paddle/legacy/trainer/tests/rnn_gen_test_model_dir/r1.test.nobeam diff --git a/paddle/trainer/tests/rnn_gen_test_model_dir/t1/transtable b/paddle/legacy/trainer/tests/rnn_gen_test_model_dir/t1/transtable similarity index 100% rename from paddle/trainer/tests/rnn_gen_test_model_dir/t1/transtable rename to paddle/legacy/trainer/tests/rnn_gen_test_model_dir/t1/transtable diff --git a/paddle/trainer/tests/rnn_gen_test_model_dir/t1/wordvec b/paddle/legacy/trainer/tests/rnn_gen_test_model_dir/t1/wordvec similarity index 100% rename from paddle/trainer/tests/rnn_gen_test_model_dir/t1/wordvec rename to paddle/legacy/trainer/tests/rnn_gen_test_model_dir/t1/wordvec diff --git a/paddle/trainer/tests/sample_data.txt b/paddle/legacy/trainer/tests/sample_data.txt similarity index 100% rename from paddle/trainer/tests/sample_data.txt rename to paddle/legacy/trainer/tests/sample_data.txt diff --git a/paddle/legacy/trainer/tests/sample_filelist.txt b/paddle/legacy/trainer/tests/sample_filelist.txt new file mode 100644 index 0000000000..8573f9e179 --- /dev/null +++ b/paddle/legacy/trainer/tests/sample_filelist.txt @@ -0,0 +1 @@ +legacy/trainer/tests/sample_data.txt diff --git a/paddle/trainer/tests/sample_trainer_config.conf b/paddle/legacy/trainer/tests/sample_trainer_config.conf similarity index 95% rename from paddle/trainer/tests/sample_trainer_config.conf rename to paddle/legacy/trainer/tests/sample_trainer_config.conf index 2697832840..5800b36256 100644 --- a/paddle/trainer/tests/sample_trainer_config.conf +++ b/paddle/legacy/trainer/tests/sample_trainer_config.conf @@ -16,13 +16,13 @@ from paddle.trainer_config_helpers import * TrainData(SimpleData( - files = "trainer/tests/sample_filelist.txt", + files = "legacy/trainer/tests/sample_filelist.txt", feat_dim = 3, context_len = 0, buffer_capacity = 1000000)) TestData(SimpleData( - files = "trainer/tests/sample_filelist.txt", + files = "legacy/trainer/tests/sample_filelist.txt", feat_dim = 3, context_len = 0, buffer_capacity = 1000000)) diff --git a/paddle/trainer/tests/sample_trainer_config_hsigmoid.conf b/paddle/legacy/trainer/tests/sample_trainer_config_hsigmoid.conf similarity index 96% rename from paddle/trainer/tests/sample_trainer_config_hsigmoid.conf rename to paddle/legacy/trainer/tests/sample_trainer_config_hsigmoid.conf index e4abe31d48..155c40b31f 100644 --- a/paddle/trainer/tests/sample_trainer_config_hsigmoid.conf +++ b/paddle/legacy/trainer/tests/sample_trainer_config_hsigmoid.conf @@ -17,7 +17,7 @@ from paddle.trainer_config_helpers import * TrainData(SimpleData( - files = "trainer/tests/sample_filelist.txt", + files = "legacy/trainer/tests/sample_filelist.txt", feat_dim = 3, context_len = 0, buffer_capacity = 1000000, diff --git a/paddle/trainer/tests/sample_trainer_config_parallel.conf b/paddle/legacy/trainer/tests/sample_trainer_config_parallel.conf similarity index 95% rename from paddle/trainer/tests/sample_trainer_config_parallel.conf rename to paddle/legacy/trainer/tests/sample_trainer_config_parallel.conf index e2b8b3ecda..49cdde7fa2 100644 --- a/paddle/trainer/tests/sample_trainer_config_parallel.conf +++ b/paddle/legacy/trainer/tests/sample_trainer_config_parallel.conf @@ -16,13 +16,13 @@ from paddle.trainer_config_helpers import * TrainData(SimpleData( - files = "trainer/tests/sample_filelist.txt", + files = "legacy/trainer/tests/sample_filelist.txt", feat_dim = 3, context_len = 0, buffer_capacity = 1000000)) TestData(SimpleData( - files = "trainer/tests/sample_filelist.txt", + files = "legacy/trainer/tests/sample_filelist.txt", feat_dim = 3, context_len = 0, buffer_capacity = 1000000)) diff --git a/paddle/trainer/tests/sample_trainer_nest_rnn_gen.conf b/paddle/legacy/trainer/tests/sample_trainer_nest_rnn_gen.conf similarity index 94% rename from paddle/trainer/tests/sample_trainer_nest_rnn_gen.conf rename to paddle/legacy/trainer/tests/sample_trainer_nest_rnn_gen.conf index 741a0aa71d..51ef905a5a 100644 --- a/paddle/trainer/tests/sample_trainer_nest_rnn_gen.conf +++ b/paddle/legacy/trainer/tests/sample_trainer_nest_rnn_gen.conf @@ -63,8 +63,8 @@ beam_gen_concat = recurrent_group(name="rnn_gen_concat", seqtext_printer_evaluator(input=beam_gen_concat, id_input=sent_id, - dict_file="./trainer/tests/test_gen_dict.txt", - result_file="./trainer/tests/dump_text.test") + dict_file="./legacy/trainer/tests/test_gen_dict.txt", + result_file="./legacy/trainer/tests/dump_text.test") #outputs(beam_gen_concat) # In this config, as dummy_data_input doesn't work on beam_gen (we can find dummy_memory # is read-only memory, and isn't used by other layers of step), we show the Inputs and Outputs diff --git a/paddle/trainer/tests/sample_trainer_rnn_gen.conf b/paddle/legacy/trainer/tests/sample_trainer_rnn_gen.conf similarity index 94% rename from paddle/trainer/tests/sample_trainer_rnn_gen.conf rename to paddle/legacy/trainer/tests/sample_trainer_rnn_gen.conf index 58d27f15ae..35c7f0fcd9 100644 --- a/paddle/trainer/tests/sample_trainer_rnn_gen.conf +++ b/paddle/legacy/trainer/tests/sample_trainer_rnn_gen.conf @@ -56,8 +56,8 @@ beam_gen = beam_search(name="rnn_gen", seqtext_printer_evaluator(input=beam_gen, id_input=sent_id, - dict_file="./trainer/tests/test_gen_dict.txt", - result_file="./trainer/tests/dump_text.test") + dict_file="./legacy/trainer/tests/test_gen_dict.txt", + result_file="./legacy/trainer/tests/dump_text.test") #outputs(beam_gen) # In this config, as dummy_data_input doesn't work on beam_gen (we can find dummy_memory # is read-only memory, and isn't used by other layers of step), we show the Inputs and Outputs diff --git a/paddle/trainer/tests/simple_sparse_neural_network.py b/paddle/legacy/trainer/tests/simple_sparse_neural_network.py similarity index 95% rename from paddle/trainer/tests/simple_sparse_neural_network.py rename to paddle/legacy/trainer/tests/simple_sparse_neural_network.py index 970fb466dc..9419f4d903 100644 --- a/paddle/trainer/tests/simple_sparse_neural_network.py +++ b/paddle/legacy/trainer/tests/simple_sparse_neural_network.py @@ -16,7 +16,7 @@ from paddle.trainer_config_helpers import * settings(batch_size=17, learning_method=AdaGradOptimizer(), learning_rate=1e-4) -file_list = 'trainer/tests/fake_file_list.list' +file_list = 'legacy/trainer/tests/fake_file_list.list' define_py_data_sources2( train_list=file_list, diff --git a/paddle/trainer/tests/simple_sparse_neural_network_dp.py b/paddle/legacy/trainer/tests/simple_sparse_neural_network_dp.py similarity index 100% rename from paddle/trainer/tests/simple_sparse_neural_network_dp.py rename to paddle/legacy/trainer/tests/simple_sparse_neural_network_dp.py diff --git a/paddle/trainer/tests/testPyDataWrapper.py b/paddle/legacy/trainer/tests/testPyDataWrapper.py similarity index 100% rename from paddle/trainer/tests/testPyDataWrapper.py rename to paddle/legacy/trainer/tests/testPyDataWrapper.py diff --git a/paddle/trainer/tests/test_Compare.cpp b/paddle/legacy/trainer/tests/test_Compare.cpp similarity index 96% rename from paddle/trainer/tests/test_Compare.cpp rename to paddle/legacy/trainer/tests/test_Compare.cpp index f3a964acb6..e37e546be8 100644 --- a/paddle/trainer/tests/test_Compare.cpp +++ b/paddle/legacy/trainer/tests/test_Compare.cpp @@ -12,9 +12,9 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. */ -#include +#include -#include "paddle/trainer/Trainer.h" +#include "paddle/legacy/trainer/Trainer.h" #include #include @@ -22,7 +22,8 @@ limitations under the License. */ using namespace paddle; // NOLINT using namespace std; // NOLINT -static const string& configFile = "trainer/tests/sample_trainer_config.conf"; +static const string& configFile = + "legacy/trainer/tests/sample_trainer_config.conf"; DECLARE_int32(gpu_id); DECLARE_bool(use_gpu); diff --git a/paddle/trainer/tests/test_PyDataProviderWrapper.cpp b/paddle/legacy/trainer/tests/test_PyDataProviderWrapper.cpp similarity index 96% rename from paddle/trainer/tests/test_PyDataProviderWrapper.cpp rename to paddle/legacy/trainer/tests/test_PyDataProviderWrapper.cpp index 92dc8aa9ec..847adcfaba 100644 --- a/paddle/trainer/tests/test_PyDataProviderWrapper.cpp +++ b/paddle/legacy/trainer/tests/test_PyDataProviderWrapper.cpp @@ -15,10 +15,10 @@ limitations under the License. */ #ifndef PADDLE_NO_PYTHON #include #include -#include -#include -#include -#include +#include +#include +#include +#include #include #include #include @@ -26,7 +26,7 @@ limitations under the License. */ #include "picojson.h" void checkValue(std::vector& arguments, picojson::array& arr); -const std::string kDir = "./trainer/tests/pydata_provider_wrapper_dir/"; +const std::string kDir = "./legacy/trainer/tests/pydata_provider_wrapper_dir/"; TEST(PyDataProviderWrapper, SequenceData) { paddle::DataConfig conf; diff --git a/paddle/trainer/tests/test_Trainer.cpp b/paddle/legacy/trainer/tests/test_Trainer.cpp similarity index 88% rename from paddle/trainer/tests/test_Trainer.cpp rename to paddle/legacy/trainer/tests/test_Trainer.cpp index 394038cf73..14ad0a2652 100644 --- a/paddle/trainer/tests/test_Trainer.cpp +++ b/paddle/legacy/trainer/tests/test_Trainer.cpp @@ -12,20 +12,21 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. */ -#include -#include -#include "paddle/trainer/Trainer.h" +#include +#include +#include "paddle/legacy/trainer/Trainer.h" #include using namespace paddle; // NOLINT using namespace std; // NOLINT -static const string& configFile1 = "trainer/tests/sample_trainer_config.conf"; +static const string& configFile1 = + "legacy/trainer/tests/sample_trainer_config.conf"; static const string& configFile2 = - "trainer/tests/sample_trainer_config_hsigmoid.conf"; + "legacy/trainer/tests/sample_trainer_config_hsigmoid.conf"; static const string& configFile4 = - "trainer/tests/sample_trainer_config_parallel.conf"; + "legacy/trainer/tests/sample_trainer_config_parallel.conf"; DECLARE_bool(use_gpu); DECLARE_string(config); diff --git a/paddle/trainer/tests/test_TrainerOnePass.cpp b/paddle/legacy/trainer/tests/test_TrainerOnePass.cpp similarity index 95% rename from paddle/trainer/tests/test_TrainerOnePass.cpp rename to paddle/legacy/trainer/tests/test_TrainerOnePass.cpp index de12c4d649..3e5c5ea723 100644 --- a/paddle/trainer/tests/test_TrainerOnePass.cpp +++ b/paddle/legacy/trainer/tests/test_TrainerOnePass.cpp @@ -12,23 +12,24 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. */ -#include -#include -#include "paddle/trainer/Trainer.h" -#include "paddle/trainer/TrainerInternal.h" +#include +#include +#include "paddle/legacy/trainer/Trainer.h" +#include "paddle/legacy/trainer/TrainerInternal.h" #include -#include +#include using namespace paddle; // NOLINT using namespace std; // NOLINT -static const string& configFile1 = "trainer/tests/sample_trainer_config.conf"; +static const string& configFile1 = + "legacy/trainer/tests/sample_trainer_config.conf"; static const string& configFile2 = - "trainer/tests/sample_trainer_config_parallel.conf"; + "legacy/trainer/tests/sample_trainer_config_parallel.conf"; static const string& configFileSimpleSparse = - "trainer/tests/simple_sparse_neural_network.py"; + "legacy/trainer/tests/simple_sparse_neural_network.py"; DECLARE_bool(use_gpu); DECLARE_string(config); diff --git a/paddle/trainer/tests/test_config.conf b/paddle/legacy/trainer/tests/test_config.conf similarity index 97% rename from paddle/trainer/tests/test_config.conf rename to paddle/legacy/trainer/tests/test_config.conf index 2f86aaa753..bce687ad83 100644 --- a/paddle/trainer/tests/test_config.conf +++ b/paddle/legacy/trainer/tests/test_config.conf @@ -16,7 +16,7 @@ from paddle.trainer_config_helpers import * TrainData(SimpleData( - files = "trainer/tests/sample_filelist.txt", + files = "legacy/trainer/tests/sample_filelist.txt", feat_dim = 3, context_len = 0, buffer_capacity = 1000000, diff --git a/paddle/trainer/tests/test_gen_dict.txt b/paddle/legacy/trainer/tests/test_gen_dict.txt similarity index 100% rename from paddle/trainer/tests/test_gen_dict.txt rename to paddle/legacy/trainer/tests/test_gen_dict.txt diff --git a/paddle/trainer/tests/test_recurrent_machine_generation.cpp b/paddle/legacy/trainer/tests/test_recurrent_machine_generation.cpp similarity index 90% rename from paddle/trainer/tests/test_recurrent_machine_generation.cpp rename to paddle/legacy/trainer/tests/test_recurrent_machine_generation.cpp index a8fbe31c2b..47b4e82cd3 100644 --- a/paddle/trainer/tests/test_recurrent_machine_generation.cpp +++ b/paddle/legacy/trainer/tests/test_recurrent_machine_generation.cpp @@ -14,21 +14,23 @@ limitations under the License. */ #include -#include -#include +#include +#include #include using namespace paddle; // NOLINT using namespace std; // NOLINT -static const string& CONFIG_FILE = "trainer/tests/sample_trainer_rnn_gen.conf"; +static const string& CONFIG_FILE = + "legacy/trainer/tests/sample_trainer_rnn_gen.conf"; static const string& NEST_CONFIG_FILE = - "trainer/tests/sample_trainer_nest_rnn_gen.conf"; -static const string& OUTPUT_DIR = "trainer/tests/dump_text.test"; -static string modelDir = "trainer/tests/rnn_gen_test_model_dir/t1"; // NOLINT -static string expectFile = // NOLINT - "trainer/tests/rnn_gen_test_model_dir/r1.test"; // NOLINT + "legacy/trainer/tests/sample_trainer_nest_rnn_gen.conf"; +static const string& OUTPUT_DIR = "legacy/trainer/tests/dump_text.test"; +static string modelDir = + "legacy/trainer/tests/rnn_gen_test_model_dir/t1"; // NOLINT +static string expectFile = // NOLINT + "legacy/trainer/tests/rnn_gen_test_model_dir/r1.test"; // NOLINT DECLARE_string(config_args); diff --git a/paddle/utils/.gitignore b/paddle/legacy/utils/.gitignore similarity index 100% rename from paddle/utils/.gitignore rename to paddle/legacy/utils/.gitignore diff --git a/paddle/utils/Any.h b/paddle/legacy/utils/Any.h similarity index 100% rename from paddle/utils/Any.h rename to paddle/legacy/utils/Any.h diff --git a/paddle/utils/CMakeLists.txt b/paddle/legacy/utils/CMakeLists.txt similarity index 100% rename from paddle/utils/CMakeLists.txt rename to paddle/legacy/utils/CMakeLists.txt diff --git a/paddle/utils/ClassRegistrar.h b/paddle/legacy/utils/ClassRegistrar.h similarity index 100% rename from paddle/utils/ClassRegistrar.h rename to paddle/legacy/utils/ClassRegistrar.h diff --git a/paddle/utils/Common.h b/paddle/legacy/utils/Common.h similarity index 100% rename from paddle/utils/Common.h rename to paddle/legacy/utils/Common.h diff --git a/paddle/utils/CpuId.cpp b/paddle/legacy/utils/CpuId.cpp similarity index 96% rename from paddle/utils/CpuId.cpp rename to paddle/legacy/utils/CpuId.cpp index 7186feef04..66e7c6606f 100644 --- a/paddle/utils/CpuId.cpp +++ b/paddle/legacy/utils/CpuId.cpp @@ -9,8 +9,8 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. */ -#include "paddle/utils/CpuId.h" -#include "paddle/utils/Util.h" +#include "paddle/legacy/utils/CpuId.h" +#include "paddle/legacy/utils/Util.h" #ifdef _WIN32 diff --git a/paddle/utils/CpuId.h b/paddle/legacy/utils/CpuId.h similarity index 100% rename from paddle/utils/CpuId.h rename to paddle/legacy/utils/CpuId.h diff --git a/paddle/utils/CustomStackTrace.cpp b/paddle/legacy/utils/CustomStackTrace.cpp similarity index 100% rename from paddle/utils/CustomStackTrace.cpp rename to paddle/legacy/utils/CustomStackTrace.cpp diff --git a/paddle/utils/CustomStackTrace.h b/paddle/legacy/utils/CustomStackTrace.h similarity index 100% rename from paddle/utils/CustomStackTrace.h rename to paddle/legacy/utils/CustomStackTrace.h diff --git a/paddle/utils/DynamicLoader.cpp b/paddle/legacy/utils/DynamicLoader.cpp similarity index 100% rename from paddle/utils/DynamicLoader.cpp rename to paddle/legacy/utils/DynamicLoader.cpp diff --git a/paddle/utils/DynamicLoader.h b/paddle/legacy/utils/DynamicLoader.h similarity index 100% rename from paddle/utils/DynamicLoader.h rename to paddle/legacy/utils/DynamicLoader.h diff --git a/paddle/utils/Error.h b/paddle/legacy/utils/Error.h similarity index 100% rename from paddle/utils/Error.h rename to paddle/legacy/utils/Error.h diff --git a/paddle/utils/Excepts.h b/paddle/legacy/utils/Excepts.h similarity index 100% rename from paddle/utils/Excepts.h rename to paddle/legacy/utils/Excepts.h diff --git a/paddle/utils/Flags.cpp b/paddle/legacy/utils/Flags.cpp similarity index 100% rename from paddle/utils/Flags.cpp rename to paddle/legacy/utils/Flags.cpp diff --git a/paddle/utils/Flags.h b/paddle/legacy/utils/Flags.h similarity index 100% rename from paddle/utils/Flags.h rename to paddle/legacy/utils/Flags.h diff --git a/paddle/utils/GlobalConstants.cpp b/paddle/legacy/utils/GlobalConstants.cpp similarity index 100% rename from paddle/utils/GlobalConstants.cpp rename to paddle/legacy/utils/GlobalConstants.cpp diff --git a/paddle/utils/GlobalConstants.h b/paddle/legacy/utils/GlobalConstants.h similarity index 100% rename from paddle/utils/GlobalConstants.h rename to paddle/legacy/utils/GlobalConstants.h diff --git a/paddle/utils/Locks.h b/paddle/legacy/utils/Locks.h similarity index 100% rename from paddle/utils/Locks.h rename to paddle/legacy/utils/Locks.h diff --git a/paddle/utils/Logging.cpp b/paddle/legacy/utils/Logging.cpp similarity index 100% rename from paddle/utils/Logging.cpp rename to paddle/legacy/utils/Logging.cpp diff --git a/paddle/utils/Logging.h b/paddle/legacy/utils/Logging.h similarity index 100% rename from paddle/utils/Logging.h rename to paddle/legacy/utils/Logging.h diff --git a/paddle/utils/PythonUtil.cpp b/paddle/legacy/utils/PythonUtil.cpp similarity index 100% rename from paddle/utils/PythonUtil.cpp rename to paddle/legacy/utils/PythonUtil.cpp diff --git a/paddle/utils/PythonUtil.h b/paddle/legacy/utils/PythonUtil.h similarity index 99% rename from paddle/utils/PythonUtil.h rename to paddle/legacy/utils/PythonUtil.h index 6f8d7e0930..b0c8612c37 100644 --- a/paddle/utils/PythonUtil.h +++ b/paddle/legacy/utils/PythonUtil.h @@ -14,7 +14,7 @@ limitations under the License. */ #pragma once // clang-format off -#include "paddle/utils/Util.h" +#include "paddle/legacy/utils/Util.h" #ifndef PADDLE_NO_PYTHON // must include the following two blocks, otherwise, diff --git a/paddle/utils/Queue.h b/paddle/legacy/utils/Queue.h similarity index 100% rename from paddle/utils/Queue.h rename to paddle/legacy/utils/Queue.h diff --git a/paddle/utils/Stat.cpp b/paddle/legacy/utils/Stat.cpp similarity index 100% rename from paddle/utils/Stat.cpp rename to paddle/legacy/utils/Stat.cpp diff --git a/paddle/utils/Stat.h b/paddle/legacy/utils/Stat.h similarity index 100% rename from paddle/utils/Stat.h rename to paddle/legacy/utils/Stat.h diff --git a/paddle/utils/StringUtil.cpp b/paddle/legacy/utils/StringUtil.cpp similarity index 100% rename from paddle/utils/StringUtil.cpp rename to paddle/legacy/utils/StringUtil.cpp diff --git a/paddle/utils/StringUtil.h b/paddle/legacy/utils/StringUtil.h similarity index 100% rename from paddle/utils/StringUtil.h rename to paddle/legacy/utils/StringUtil.h diff --git a/paddle/utils/Thread.h b/paddle/legacy/utils/Thread.h similarity index 100% rename from paddle/utils/Thread.h rename to paddle/legacy/utils/Thread.h diff --git a/paddle/utils/ThreadLocal.cpp b/paddle/legacy/utils/ThreadLocal.cpp similarity index 100% rename from paddle/utils/ThreadLocal.cpp rename to paddle/legacy/utils/ThreadLocal.cpp diff --git a/paddle/utils/ThreadLocal.h b/paddle/legacy/utils/ThreadLocal.h similarity index 100% rename from paddle/utils/ThreadLocal.h rename to paddle/legacy/utils/ThreadLocal.h diff --git a/paddle/utils/Util.cpp b/paddle/legacy/utils/Util.cpp similarity index 100% rename from paddle/utils/Util.cpp rename to paddle/legacy/utils/Util.cpp diff --git a/paddle/utils/Util.h b/paddle/legacy/utils/Util.h similarity index 100% rename from paddle/utils/Util.h rename to paddle/legacy/utils/Util.h diff --git a/paddle/utils/Version.cpp b/paddle/legacy/utils/Version.cpp similarity index 100% rename from paddle/utils/Version.cpp rename to paddle/legacy/utils/Version.cpp diff --git a/paddle/utils/Version.h b/paddle/legacy/utils/Version.h similarity index 100% rename from paddle/utils/Version.h rename to paddle/legacy/utils/Version.h diff --git a/paddle/utils/arch/linux/Locks.cpp b/paddle/legacy/utils/arch/linux/Locks.cpp similarity index 97% rename from paddle/utils/arch/linux/Locks.cpp rename to paddle/legacy/utils/arch/linux/Locks.cpp index 409af8bce3..32d351e332 100644 --- a/paddle/utils/arch/linux/Locks.cpp +++ b/paddle/legacy/utils/arch/linux/Locks.cpp @@ -12,10 +12,10 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. */ -#include "paddle/utils/Locks.h" +#include "paddle/legacy/utils/Locks.h" #include #include -#include "paddle/utils/Logging.h" +#include "paddle/legacy/utils/Logging.h" namespace paddle { class SemaphorePrivate { diff --git a/paddle/utils/arch/osx/Excepts.cpp b/paddle/legacy/utils/arch/osx/Excepts.cpp similarity index 97% rename from paddle/utils/arch/osx/Excepts.cpp rename to paddle/legacy/utils/arch/osx/Excepts.cpp index ac44461578..2b7d6dca84 100644 --- a/paddle/utils/arch/osx/Excepts.cpp +++ b/paddle/legacy/utils/arch/osx/Excepts.cpp @@ -12,7 +12,7 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. */ -#include "paddle/utils/Excepts.h" +#include "paddle/legacy/utils/Excepts.h" #if defined(__APPLE__) || defined(__OSX__) #if defined(__arm__) || defined(__arm64__) diff --git a/paddle/utils/arch/osx/Locks.cpp b/paddle/legacy/utils/arch/osx/Locks.cpp similarity index 97% rename from paddle/utils/arch/osx/Locks.cpp rename to paddle/legacy/utils/arch/osx/Locks.cpp index f3905091bd..b68c48f0c3 100644 --- a/paddle/utils/arch/osx/Locks.cpp +++ b/paddle/legacy/utils/arch/osx/Locks.cpp @@ -12,11 +12,11 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. */ -#include "paddle/utils/Locks.h" +#include "paddle/legacy/utils/Locks.h" #include #include #include -#include "paddle/utils/Logging.h" +#include "paddle/legacy/utils/Logging.h" namespace paddle { diff --git a/paddle/utils/enable_virtualenv.py b/paddle/legacy/utils/enable_virtualenv.py similarity index 100% rename from paddle/utils/enable_virtualenv.py rename to paddle/legacy/utils/enable_virtualenv.py diff --git a/paddle/utils/tests/CMakeLists.txt b/paddle/legacy/utils/tests/CMakeLists.txt similarity index 84% rename from paddle/utils/tests/CMakeLists.txt rename to paddle/legacy/utils/tests/CMakeLists.txt index c770ce1698..4af01db5c8 100644 --- a/paddle/utils/tests/CMakeLists.txt +++ b/paddle/legacy/utils/tests/CMakeLists.txt @@ -13,6 +13,6 @@ add_executable( link_paddle_exe(test_CustomStackTracePrint) if(NOT APPLE) add_test(NAME test_CustomStackTracePrint - COMMAND ${PADDLE_SOURCE_DIR}/paddle/utils/tests/test_CustomStackTracePrint.sh + COMMAND ${PADDLE_SOURCE_DIR}/paddle/legacy/utils/tests/test_CustomStackTracePrint.sh WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}) endif() diff --git a/paddle/utils/tests/test_CustomStackTrace.cpp b/paddle/legacy/utils/tests/test_CustomStackTrace.cpp similarity index 94% rename from paddle/utils/tests/test_CustomStackTrace.cpp rename to paddle/legacy/utils/tests/test_CustomStackTrace.cpp index 4d5540b24c..2a418e3ae2 100644 --- a/paddle/utils/tests/test_CustomStackTrace.cpp +++ b/paddle/legacy/utils/tests/test_CustomStackTrace.cpp @@ -15,10 +15,10 @@ limitations under the License. */ #include // NOLINT #include // NOLINT -#include "paddle/utils/CustomStackTrace.h" -#include "paddle/utils/Locks.h" -#include "paddle/utils/StringUtil.h" -#include "paddle/utils/Util.h" +#include "paddle/legacy/utils/CustomStackTrace.h" +#include "paddle/legacy/utils/Locks.h" +#include "paddle/legacy/utils/StringUtil.h" +#include "paddle/legacy/utils/Util.h" DEFINE_int32(test_thread_num, 10, "testing thread number"); diff --git a/paddle/utils/tests/test_CustomStackTracePrint.cpp b/paddle/legacy/utils/tests/test_CustomStackTracePrint.cpp similarity index 86% rename from paddle/utils/tests/test_CustomStackTracePrint.cpp rename to paddle/legacy/utils/tests/test_CustomStackTracePrint.cpp index 360c61c88a..78886a3ed9 100644 --- a/paddle/utils/tests/test_CustomStackTracePrint.cpp +++ b/paddle/legacy/utils/tests/test_CustomStackTracePrint.cpp @@ -12,9 +12,9 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. */ -#include "paddle/utils/CustomStackTrace.h" -#include "paddle/utils/StringUtil.h" -#include "paddle/utils/Util.h" +#include "paddle/legacy/utils/CustomStackTrace.h" +#include "paddle/legacy/utils/StringUtil.h" +#include "paddle/legacy/utils/Util.h" int main(int argc, char** argv) { paddle::initMain(argc, argv); diff --git a/paddle/utils/tests/test_CustomStackTracePrint.sh b/paddle/legacy/utils/tests/test_CustomStackTracePrint.sh similarity index 100% rename from paddle/utils/tests/test_CustomStackTracePrint.sh rename to paddle/legacy/utils/tests/test_CustomStackTracePrint.sh diff --git a/paddle/utils/tests/test_Error.cpp b/paddle/legacy/utils/tests/test_Error.cpp similarity index 96% rename from paddle/utils/tests/test_Error.cpp rename to paddle/legacy/utils/tests/test_Error.cpp index 6f311fa6b8..250c4d58a6 100644 --- a/paddle/utils/tests/test_Error.cpp +++ b/paddle/legacy/utils/tests/test_Error.cpp @@ -12,7 +12,7 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. */ -#include "paddle/utils/Error.h" +#include "paddle/legacy/utils/Error.h" #include diff --git a/paddle/utils/tests/test_SIMDFlags.cpp b/paddle/legacy/utils/tests/test_SIMDFlags.cpp similarity index 94% rename from paddle/utils/tests/test_SIMDFlags.cpp rename to paddle/legacy/utils/tests/test_SIMDFlags.cpp index a808d456a6..6362210acd 100644 --- a/paddle/utils/tests/test_SIMDFlags.cpp +++ b/paddle/legacy/utils/tests/test_SIMDFlags.cpp @@ -11,9 +11,9 @@ limitations under the License. */ #include -#include "paddle/utils/CpuId.h" -#include "paddle/utils/Logging.h" -#include "paddle/utils/Util.h" +#include "paddle/legacy/utils/CpuId.h" +#include "paddle/legacy/utils/Logging.h" +#include "paddle/legacy/utils/Util.h" using namespace paddle; // NOLINT diff --git a/paddle/utils/tests/test_SpinLock.cpp b/paddle/legacy/utils/tests/test_SpinLock.cpp similarity index 93% rename from paddle/utils/tests/test_SpinLock.cpp rename to paddle/legacy/utils/tests/test_SpinLock.cpp index cc34eb1f86..4cd7836d6a 100644 --- a/paddle/utils/tests/test_SpinLock.cpp +++ b/paddle/legacy/utils/tests/test_SpinLock.cpp @@ -17,9 +17,9 @@ limitations under the License. */ #include #include -#include "paddle/utils/Locks.h" -#include "paddle/utils/Logging.h" -#include "paddle/utils/Util.h" +#include "paddle/legacy/utils/Locks.h" +#include "paddle/legacy/utils/Logging.h" +#include "paddle/legacy/utils/Util.h" DEFINE_int32(test_thread_num, 100, "testing thread number"); diff --git a/paddle/utils/tests/test_StringUtils.cpp b/paddle/legacy/utils/tests/test_StringUtils.cpp similarity index 95% rename from paddle/utils/tests/test_StringUtils.cpp rename to paddle/legacy/utils/tests/test_StringUtils.cpp index 248f58a7f2..61d2815f09 100644 --- a/paddle/utils/tests/test_StringUtils.cpp +++ b/paddle/legacy/utils/tests/test_StringUtils.cpp @@ -12,7 +12,7 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. */ -#include "paddle/utils/StringUtil.h" +#include "paddle/legacy/utils/StringUtil.h" #include diff --git a/paddle/utils/tests/test_Thread.cpp b/paddle/legacy/utils/tests/test_Thread.cpp similarity index 98% rename from paddle/utils/tests/test_Thread.cpp rename to paddle/legacy/utils/tests/test_Thread.cpp index 6e2580c491..5e07da3236 100644 --- a/paddle/utils/tests/test_Thread.cpp +++ b/paddle/legacy/utils/tests/test_Thread.cpp @@ -13,7 +13,7 @@ See the License for the specific language governing permissions and limitations under the License. */ #include -#include +#include #include using paddle::AsyncThreadPool; // NOLINT diff --git a/paddle/utils/tests/test_ThreadBarrier.cpp b/paddle/legacy/utils/tests/test_ThreadBarrier.cpp similarity index 94% rename from paddle/utils/tests/test_ThreadBarrier.cpp rename to paddle/legacy/utils/tests/test_ThreadBarrier.cpp index 554b1c1d4a..9c8851ae21 100644 --- a/paddle/utils/tests/test_ThreadBarrier.cpp +++ b/paddle/legacy/utils/tests/test_ThreadBarrier.cpp @@ -18,9 +18,9 @@ limitations under the License. */ #include #include -#include "paddle/utils/Locks.h" -#include "paddle/utils/Logging.h" -#include "paddle/utils/Util.h" +#include "paddle/legacy/utils/Locks.h" +#include "paddle/legacy/utils/Logging.h" +#include "paddle/legacy/utils/Util.h" DEFINE_int32(test_thread_num, 100, "testing thread number"); diff --git a/paddle/scripts/paddle_build.sh b/paddle/scripts/paddle_build.sh index e8b3053267..d173b41e86 100755 --- a/paddle/scripts/paddle_build.sh +++ b/paddle/scripts/paddle_build.sh @@ -22,7 +22,7 @@ function print_usage() { echo -e "\n${RED}Usage${NONE}: ${BOLD}${SCRIPT_NAME}${NONE} [OPTION]" - + echo -e "\n${RED}Options${NONE}: ${BLUE}build${NONE}: run build for x86 platform ${BLUE}build_android${NONE}: run build for android platform @@ -106,6 +106,8 @@ function cmake_gen() { -DWITH_FLUID_ONLY=${WITH_FLUID_ONLY:-OFF} -DCMAKE_EXPORT_COMPILE_COMMANDS=ON -DWITH_CONTRIB=${WITH_CONTRIB:-ON} + -DWITH_ANAKIN=${WITH_ANAKIN:-OFF} + -DWITH_INFERENCE_DEMO=${WITH_INFERENCE_DEMO:-ON} ======================================== EOF # Disable UNITTEST_USE_VIRTUALENV in docker because @@ -133,7 +135,8 @@ EOF -DWITH_FLUID_ONLY=${WITH_FLUID_ONLY:-OFF} \ -DCMAKE_EXPORT_COMPILE_COMMANDS=ON \ -DWITH_CONTRIB=${WITH_CONTRIB:-ON} \ - -DWITH_ANAKIN=ON + -DWITH_ANAKIN=${WITH_ANAKIN:-OFF} \ + -DWITH_INFERENCE_DEMO=${WITH_INFERENCE_DEMO:-ON} } function abort(){ @@ -198,7 +201,7 @@ function build_android() { fi ANDROID_STANDALONE_TOOLCHAIN=$ANDROID_TOOLCHAINS_DIR/$ANDROID_ARCH-android-$ANDROID_API - + cat < origin.spec + python ${PADDLE_ROOT}/tools/print_signatures.py paddle.fluid > new.spec + python ${PADDLE_ROOT}/tools/diff_api.py origin.spec new.spec + deactivate +} + + function single_test() { TEST_NAME=$1 if [ -z "${TEST_NAME}" ]; then @@ -331,14 +348,14 @@ EOF function bind_test() { # the number of process to run tests NUM_PROC=6 - + # calculate and set the memory usage for each process MEM_USAGE=$(printf "%.2f" `echo "scale=5; 1.0 / $NUM_PROC" | bc`) export FLAGS_fraction_of_gpu_memory_to_use=$MEM_USAGE - + # get the CUDA device count CUDA_DEVICE_COUNT=$(nvidia-smi -L | wc -l) - + for (( i = 0; i < $NUM_PROC; i++ )); do cuda_list=() for (( j = 0; j < $CUDA_DEVICE_COUNT; j++ )); do @@ -547,6 +564,7 @@ function main() { cicheck) cmake_gen ${PYTHON_ABI:-""} build + assert_api_not_changed run_test gen_capi_package gen_fluid_inference_lib diff --git a/paddle/testing/CMakeLists.txt b/paddle/testing/CMakeLists.txt index a1f446817e..2264481899 100644 --- a/paddle/testing/CMakeLists.txt +++ b/paddle/testing/CMakeLists.txt @@ -6,6 +6,6 @@ if(WITH_TESTING) add_library(paddle_test_util STATIC TestUtil.cpp) add_dependencies(paddle_test_util paddle_proto ${external_project_dependencies}) if(NOT MOBILE_INFERENCE) - cc_library(paddle_gtest_main SRCS paddle_gtest_main.cc DEPS init memory gtest gflags) + cc_library(paddle_gtest_main SRCS paddle_gtest_main.cc DEPS device_context memory gtest gflags) endif() endif() diff --git a/paddle/testing/TestMain.cpp b/paddle/testing/TestMain.cpp index 3e14532d18..1811dbbd1a 100644 --- a/paddle/testing/TestMain.cpp +++ b/paddle/testing/TestMain.cpp @@ -13,7 +13,7 @@ See the License for the specific language governing permissions and limitations under the License. */ #include -#include "paddle/utils/Util.h" +#include "paddle/legacy/utils/Util.h" int main(int argc, char** argv) { testing::InitGoogleTest(&argc, argv); diff --git a/paddle/testing/TestUtil.cpp b/paddle/testing/TestUtil.cpp index cfb8c713d9..fa8efc20f5 100644 --- a/paddle/testing/TestUtil.cpp +++ b/paddle/testing/TestUtil.cpp @@ -14,7 +14,7 @@ limitations under the License. */ #include "TestUtil.h" #include -#include "paddle/math/SparseMatrix.h" +#include "paddle/legacy/math/SparseMatrix.h" DEFINE_int32(fixed_seq_length, 0, "Produce some sequence of fixed length"); diff --git a/paddle/testing/TestUtil.h b/paddle/testing/TestUtil.h index ec86469aeb..98b864e3c5 100644 --- a/paddle/testing/TestUtil.h +++ b/paddle/testing/TestUtil.h @@ -15,7 +15,7 @@ limitations under the License. */ #pragma once #include -#include "paddle/math/Matrix.h" +#include "paddle/legacy/math/Matrix.h" namespace paddle { diff --git a/paddle/testing/paddle_gtest_main.cc b/paddle/testing/paddle_gtest_main.cc index 507479c862..cfea2059c3 100644 --- a/paddle/testing/paddle_gtest_main.cc +++ b/paddle/testing/paddle_gtest_main.cc @@ -16,8 +16,8 @@ limitations under the License. */ #include "gflags/gflags.h" #include "gtest/gtest.h" -#include "paddle/fluid/framework/init.h" #include "paddle/fluid/memory/memory.h" +#include "paddle/fluid/platform/init.h" int main(int argc, char** argv) { testing::InitGoogleTest(&argc, argv); @@ -30,7 +30,9 @@ int main(int argc, char** argv) { new_argv.push_back( strdup("--tryfromenv=fraction_of_gpu_memory_to_use,use_pinned_memory")); #else - new_argv.push_back(strdup("--tryfromenv=use_pinned_memory,use_mkldnn")); + new_argv.push_back(strdup( + "--tryfromenv=use_pinned_memory,use_mkldnn,initial_cpu_memory_in_mb")); + new_argv.push_back(strdup("--undefok=use_mkldnn,initial_cpu_memory_in_mb")); #endif int new_argc = static_cast(new_argv.size()); char** new_argv_address = new_argv.data(); diff --git a/paddle/trainer/tests/pydata_provider_wrapper_dir/test_pydata_provider_wrapper.list b/paddle/trainer/tests/pydata_provider_wrapper_dir/test_pydata_provider_wrapper.list deleted file mode 100644 index 0db50f34dd..0000000000 --- a/paddle/trainer/tests/pydata_provider_wrapper_dir/test_pydata_provider_wrapper.list +++ /dev/null @@ -1 +0,0 @@ -trainer/tests/pydata_provider_wrapper_dir/test_pydata_provider_wrapper.data diff --git a/paddle/trainer/tests/sample_filelist.txt b/paddle/trainer/tests/sample_filelist.txt deleted file mode 100644 index 7db4c73535..0000000000 --- a/paddle/trainer/tests/sample_filelist.txt +++ /dev/null @@ -1 +0,0 @@ -trainer/tests/sample_data.txt diff --git a/proto/README.md b/proto/README.md new file mode 100644 index 0000000000..dda7ed7b3c --- /dev/null +++ b/proto/README.md @@ -0,0 +1,3 @@ +## protos in this folder are legacy v2 protos. + +## Please refer to paddle/fluid for latest version. diff --git a/python/CMakeLists.txt b/python/CMakeLists.txt index ea25f3ab35..797c0fbcc4 100644 --- a/python/CMakeLists.txt +++ b/python/CMakeLists.txt @@ -1,4 +1,4 @@ -file(GLOB UTILS_PY_FILES . ./paddle/utils/*.py) +file(GLOB UTILS_PY_FILES . ./paddle/legacy/utils/*.py) file(GLOB_RECURSE FLUID_PY_FILES ./paddle/fluid/*.py) set(PY_FILES paddle/__init__.py ${UTILS_PY_FILES} diff --git a/python/paddle/dataset/mnist.py b/python/paddle/dataset/mnist.py index 6a1b8b5fac..9d05aeeb95 100644 --- a/python/paddle/dataset/mnist.py +++ b/python/paddle/dataset/mnist.py @@ -111,7 +111,7 @@ def fetch(): paddle.dataset.common.download(TRAIN_IMAGE_URL, 'mnist', TRAIN_IMAGE_MD5) paddle.dataset.common.download(TRAIN_LABEL_URL, 'mnist', TRAIN_LABEL_MD5) paddle.dataset.common.download(TEST_IMAGE_URL, 'mnist', TEST_IMAGE_MD5) - paddle.dataset.common.download(TEST_LABEL_URL, 'mnist', TRAIN_LABEL_MD5) + paddle.dataset.common.download(TEST_LABEL_URL, 'mnist', TEST_LABEL_MD5) def convert(path): diff --git a/python/paddle/fluid/__init__.py b/python/paddle/fluid/__init__.py index bd985ad733..3034c1a087 100644 --- a/python/paddle/fluid/__init__.py +++ b/python/paddle/fluid/__init__.py @@ -44,7 +44,7 @@ import metrics import transpiler from param_attr import ParamAttr, WeightNormParamAttr from data_feeder import DataFeeder -from core import LoDTensor, CPUPlace, CUDAPlace, CUDAPinnedPlace +from core import LoDTensor, CPUPlace, CUDAPlace, CUDAPinnedPlace, Scope from transpiler import DistributeTranspiler, InferenceTranspiler, \ memory_optimize, release_memory from concurrency import (Go, make_channel, channel_send, channel_recv, @@ -83,6 +83,7 @@ __all__ = framework.__all__ + executor.__all__ + concurrency.__all__ + \ 'profiler', 'unique_name', 'recordio_writer', + 'Scope', ] @@ -117,7 +118,8 @@ def __bootstrap__(): read_env_flags = [ 'use_pinned_memory', 'check_nan_inf', 'benchmark', 'warpctc_dir', - 'eager_delete_scope', 'use_mkldnn' + 'eager_delete_scope', 'use_mkldnn', 'initial_cpu_memory_in_mb', + 'init_allocated_mem' ] if core.is_compiled_with_cuda(): read_env_flags += [ diff --git a/python/paddle/fluid/average.py b/python/paddle/fluid/average.py index 6abe8233b0..358e24df31 100644 --- a/python/paddle/fluid/average.py +++ b/python/paddle/fluid/average.py @@ -36,6 +36,25 @@ def _is_number_or_matrix_(var): class WeightedAverage(object): + """ + Calculate weighted average. + + The average calculating is accomplished via Python totally. + They do not change Paddle's Program, nor do anything to + modify NN model's configuration. They are completely + wrappers of Python functions. + + Examples: + .. code-block:: python + avg = fluid.average.WeightedAverage() + avg.add(value=2.0, weight=1) + avg.add(value=4.0, weight=2) + avg.eval() + + # The result is 3.333333333. + # For (2.0 * 1 + 4.0 * 2) / (1 + 2) = 3.333333333 + """ + def __init__(self): warnings.warn( "The %s is deprecated, please use fluid.metrics.Accuracy instead." % diff --git a/python/paddle/fluid/backward.py b/python/paddle/fluid/backward.py index 4f9622d04d..4faa063031 100644 --- a/python/paddle/fluid/backward.py +++ b/python/paddle/fluid/backward.py @@ -132,9 +132,9 @@ def _addup_repetitive_outputs_(op_descs): for idx, op_desc in enumerate(op_descs): for var_name in op_desc.input_arg_names(): if len(renamed_vars[var_name]) > 1: - pending_sum_ops.append( - (_create_op_desc_("sum", {"X": renamed_vars[var_name]}, - {"Out": [var_name]}, {}), idx)) + pending_sum_ops.append((_create_op_desc_( + "sum", {"X": renamed_vars[var_name]}, {"Out": [var_name]}, + {"use_mkldnn": False}), idx)) renamed_vars[var_name] = [var_name] for var_name in op_desc.output_arg_names(): if var_name == core.empty_var_name( @@ -147,7 +147,7 @@ def _addup_repetitive_outputs_(op_descs): else: if len(renamed_vars[var_name]) == 1: new_name = var_name + "@RENAME@" + \ - str(var_rename_count[var_name]) + str(var_rename_count[var_name]) var_rename_count[var_name] += 1 # rename original var_name renamed_vars[var_name][0] = new_name @@ -155,14 +155,15 @@ def _addup_repetitive_outputs_(op_descs): _rename_arg_(pending_sum_ops, var_name, new_name) new_name = var_name + "@RENAME@" + \ - str(var_rename_count[var_name]) + str(var_rename_count[var_name]) var_rename_count[var_name] += 1 op_desc.rename_output(var_name, new_name) renamed_vars[var_name].append(new_name) for var_name, inputs in renamed_vars.iteritems(): if len(inputs) > 1: - pending_sum_ops.append((_create_op_desc_( - "sum", {"X": inputs}, {"Out": [var_name]}, {}), len(op_descs))) + pending_sum_ops.append( + (_create_op_desc_("sum", {"X": inputs}, {"Out": [var_name]}, + {"use_mkldnn": False}), len(op_descs))) # sum_op descs are sorted according to their insert position for p in reversed(pending_sum_ops): op_descs.insert(p[1], p[0]) @@ -434,18 +435,65 @@ def _get_stop_gradients_(program): def append_backward(loss, parameter_list=None, no_grad_set=None, callbacks=None): """ - Append backward part to main_program + Append backward part to main_program. - Args: - loss(Variable): The variable generated by cost function. - parameter_list(list[string]): Parameters that need to be updated by - optimizer. If None, it means all parameters need to be updated. - no_grad_set(set): Variables that have no gradients in Block 0. - All variables with `step_gradient=True` from all blocks will be - automatically added. + A complete neural network training is made up of forward and backward + propagation. However, when we configure a network, we only need to + specify its forwrd part. The backward part is generated automatically + according to the forward part by this function. - Return: - (list[(Variable,Variable)]): list of (parameter, gradient) pair. + In most cases, users do not need to invoke this function manually. It + will be automatically invoked by the optimizer's `minimize` function. + + Args: + loss(Variable): The loss variable of the network. + parameter_list(list[string]|None): Names of parameters that need + to be updated by optimizers. + If it is None, all parameters + will be updated. + Default: None + no_grad_set(set|None): Variables in the Block 0 whose gradients + should be ignored. All variables with + `step_gradient=True` from all blocks will + be automatically added into this set. + Default: None + callbacks(list[callable object]|None): The callbacks are used for + doing some custom jobs during + backward part building. All + callable objects in it will + be invoked once each time a + new gradient operator is added + into the program. The callable + object must has two input + parameters: 'block' and 'context'. + The 'block' is the block which + the new gradient operator will + be added to. The 'context' is a + map, whose keys are gradient + variable names and values are + corresponding original variables. + In addition to this, the 'context' + has another special key-value pair: + the key is string '__current_op_desc__' + and the value is the op_desc of the + gradient operator who has just + triggered the callable object. + + Returns: + list[(Variable,Variable)]: Pairs of parameter and its + corresponding gradients. The key is the parameter and the + value is gradient variable. + + Raises: + AssertionError: If `loss` is not an instance of Variable. + + Examples: + .. code-block:: python + + # network configuration code + # ... + avg_loss = fluid.layers.mean(loss) + param_grad_list = fluid.backward.append_backward(loss=avg_loss) """ assert isinstance(loss, framework.Variable) diff --git a/python/paddle/fluid/clip.py b/python/paddle/fluid/clip.py index 66c3fc6b66..18e2f3045e 100644 --- a/python/paddle/fluid/clip.py +++ b/python/paddle/fluid/clip.py @@ -24,8 +24,6 @@ __all__ = [ 'GradientClipByValue', 'GradientClipByNorm', 'GradientClipByGlobalNorm', - 'append_gradient_clip_ops', - 'error_clip_callback', ] @@ -38,6 +36,25 @@ class BaseErrorClipAttr(object): class ErrorClipByValue(BaseErrorClipAttr): + """ + Clips tensor values to the range [min, max]. + + Given a tensor t, this operation clips its value to min and max inplace. + + - Any values less than min are set to min. + - Any values greater than max are set to max. + + Args: + max (float): The maximum value to clip by. + min (float, optional): The minimum value to clip by. if not set by user, \ + will be set to -max by framework. + + Examples: + .. code-block:: python + + var = fluid.framework.Variable(..., error_clip=ErrorClipByValue(max=5.0), ...) + """ + def __init__(self, max, min=None): max = float(max) if min is None: @@ -99,6 +116,31 @@ class NullGradientClipAttr(BaseGradientClipAttr): class GradientClipByValue(BaseGradientClipAttr): + """ + Clips gradient values to the range [min, max]. + + Given a tensor t, this operation clips its value to min and max inplace. + + - Any values less than min are set to min. + - Any values greater than max are set to max. + + Args: + max (float): The maximum value to clip by. + min (float, optional): The minimum value to clip by. if not set by user, \ + will be set to -max by framework. + + Examples: + .. code-block:: python + + w_param_attrs = ParamAttr(name=None, + initializer=UniformInitializer(low=-1.0, high=1.0, seed=0), + learning_rate=1.0, + regularizer=L1Decay(1.0), + trainable=True, + clip=GradientClipByValue(-1.0, 1.0)) + y_predict = fluid.layers.fc(input=x, size=1, param_attr=w_param_attrs) + """ + def __init__(self, max, min=None): max = float(max) if min is None: @@ -120,6 +162,37 @@ class GradientClipByValue(BaseGradientClipAttr): class GradientClipByNorm(BaseGradientClipAttr): + """ + Clips tensor values to a maximum L2-norm. + + This operator limits the L2 norm of the input :math:`X` within :math:`max\_norm`. + If the L2 norm of :math:`X` is less than or equal to :math:`max\_norm`, :math:`Out` + will be the same as :math:`X`. If the L2 norm of :math:`X` is greater than + :math:`max\_norm`, :math:`X` will be linearly scaled to make the L2 norm of + :math:`Out` equal to :math:`max\_norm`, as shown in the following formula: + + .. math:: + + Out = \\frac{max\_norm * X}{norm(X)}, + + where :math:`norm(X)` represents the L2 norm of :math:`X`. + + Args: + clip_norm (float): The maximum norm value + + Examples: + .. code-block:: python + + w_param_attrs = ParamAttr(name=None, + initializer=UniformInitializer(low=-1.0, high=1.0, seed=0), + learning_rate=1.0, + regularizer=L1Decay(1.0), + trainable=True, + clip=GradientClipByNorm(clip_norm=2.0)) + y_predict = fluid.layers.fc(input=x, size=1, param_attr=w_param_attrs) + + """ + def __init__(self, clip_norm): self.clip_norm = clip_norm @@ -135,6 +208,44 @@ class GradientClipByNorm(BaseGradientClipAttr): class GradientClipByGlobalNorm(BaseGradientClipAttr): + """ + Clips values of multiple tensors by the ratio of the sum of their norms. + + Given a list of tensors t_list, and a clipping ratio clip_norm, this + operation returns a list of clipped tensors list_clipped and the global + norm (global_norm) of all tensors in t_list. + + To perform the clipping, the values :math:`t\_list[i]` are set to: + + .. math:: + + t\_list[i] = t\_list[i] * \\frac{clip\_norm}{\max(global\_norm, clip\_norm)} + + where: + + .. math:: + + global\_norm = \sqrt{\sum_{i=0}^{N-1}(l2norm(t\_list[i]))^2} + + If :math:`clip\_norm > global\_norm` then the entries in t_list remain as they are, + otherwise they're all shrunk by the global ratio. + + Args: + clip_norm (float): The maximum norm value + group_name (str, optional): The group name for this clip. + + Examples: + .. code-block:: python + + p_g_clip = fluid.backward.append_backward(loss=avg_cost_clip) + + with fluid.program_guard(main_program=prog_clip): + fluid.clip.set_gradient_clip( + fluid.clip.GradientClipByGlobalNorm(clip_norm=2.0)) + p_g_clip = fluid.clip.append_gradient_clip_ops(p_g_clip) + + """ + def __init__(self, clip_norm, group_name="default_group"): if not isinstance(group_name, basestring): raise TypeError("'group_name' must be a basestring.") @@ -183,15 +294,16 @@ class GradientClipByGlobalNorm(BaseGradientClipAttr): def set_gradient_clip(clip, param_list=None, program=None): """ - To specify parameters that require gradient clip. - Args: - clip(BaseGradientClipAttr): An instance of some derived class of BaseGradientClipAttr, - which describes the type and detailed attributes of required gradient clip. - param_list(list, None by default): Parameters that require gradient clip. - It can be a list of parameter or a list of parameter's name. - When it's None, all parameters in the program will be included. - program(Program, None by default): The program where parameters are. - Will be the default main program when assigned with None. + To specify parameters that require gradient clip. + + Args: + clip(BaseGradientClipAttr): An instance of some derived class of BaseGradientClipAttr, + which describes the type and detailed attributes of required gradient clip. + param_list(list(Variable)): Parameters that require gradient clip. + It can be a list of parameter or a list of parameter's name. + When it's None, all parameters in the program will be included. + program(Program): The program where parameters are. + Will be the default main program when assigned with None. """ if not isinstance(clip, BaseGradientClipAttr): raise TypeError( diff --git a/python/paddle/fluid/data_feeder.py b/python/paddle/fluid/data_feeder.py index e2013137b1..c859778b37 100644 --- a/python/paddle/fluid/data_feeder.py +++ b/python/paddle/fluid/data_feeder.py @@ -29,6 +29,13 @@ class DataToLoDTensorConverter(object): self.place = place self.lod_level = lod_level self.shape = shape + negtive_count = 0 + for s in self.shape: + if s < 0: + negtive_count += 1 + if negtive_count > 1: + self.shape = None + break if dtype == core.VarDesc.VarType.FP32: self.dtype = 'float32' elif dtype == core.VarDesc.VarType.INT64: @@ -47,7 +54,7 @@ class DataToLoDTensorConverter(object): self.lod = [] for i in six.range(lod_level): - self.lod.append([0]) + self.lod.append([]) def feed(self, data): self._feed_impl_(data, self.lod, self.lod_level) @@ -56,21 +63,77 @@ class DataToLoDTensorConverter(object): if lod_level == 0: self.data.append(data) else: - cur_lod_len = len(data) - lod[0].append(lod[0][-1] + cur_lod_len) + lod[0].append(len(data)) for each_data in data: self._feed_impl_(each_data, lod[1:], lod_level - 1) def done(self): - arr = numpy.array(self.data, dtype=self.dtype).reshape(self.shape) + arr = numpy.array(self.data, dtype=self.dtype) + if self.shape: + arr = arr.reshape(self.shape) t = core.LoDTensor() t.set(arr, self.place) if self.lod_level > 0: - t.set_lod(self.lod) + t.set_recursive_sequence_lengths(self.lod) return t class DataFeeder(object): + """ + DataFeeder converts the data that returned by a reader into a data + structure that can feed into Executor and ParallelExecutor. The reader + usually returns a list of mini-batch data entries. Each data entry in + the list is one sample. Each sample is a list or a tuple with one + feature or multiple features. + + The simple usage shows below: + + .. code-block:: python + + place = fluid.CPUPlace() + img = fluid.layers.data(name='image', shape=[1, 28, 28]) + label = fluid.layers.data(name='label', shape=[1], dtype='int64') + feeder = fluid.DataFeeder([img, label], fluid.CPUPlace()) + result = feeder.feed([([0] * 784, [9]), ([1] * 784, [1])]) + + + If you want to feed data into GPU side separately in advance when you + use multi-GPU to train a model, you can use `decorate_reader` function. + + .. code-block:: python + + place=fluid.CUDAPlace(0) + feeder = fluid.DataFeeder(place=place, feed_list=[data, label]) + reader = feeder.decorate_reader( + paddle.batch(flowers.train(), batch_size=16)) + + Args: + feed_list(list): The Variables or Variables'name that will + feed into model. + place(Place): place indicates feed data into CPU or GPU, if you want to + feed data into GPU, please using `fluid.CUDAPlace(i)` (`i` represents + the GPU id), or if you want to feed data into CPU, please using + `fluid.CPUPlace()`. + program(Program): The Program that will feed data into, if program + is None, it will use default_main_program(). Default None. + + Raises: + ValueError: If some Variable is not in this Program. + + Examples: + .. code-block:: python + + # ... + place = fluid.CPUPlace() + feed_list = [ + main_program.global_block().var(var_name) for var_name in feed_vars_name + ] # feed_vars_name is a list of variables' name. + feeder = fluid.DataFeeder(feed_list, place) + for data in reader(): + outs = exe.run(program=main_program, + feed=feeder.feed(data)) + """ + def __init__(self, feed_list, place, program=None): self.feed_dtypes = [] self.feed_names = [] @@ -100,6 +163,16 @@ class DataFeeder(object): self.place = place def feed(self, iterable): + """ + According to feed_list and iterable, converters the input into + a data structure that can feed into Executor and ParallelExecutor. + + Args: + iterable(list|tuple): the input data. + + Returns: + dict: the result of conversion. + """ converter = [] for lod_level, shape, dtype in six.zip( self.feed_lod_level, self.feed_shapes, self.feed_dtypes): @@ -122,6 +195,20 @@ class DataFeeder(object): return ret_dict def feed_parallel(self, iterable, num_places=None): + """ + Takes multiple mini-batches. Each mini-batch will be feed on each + device in advance. + + Args: + iterable(list|tuple): the input data. + num_places(int): the number of devices. Default None. + + Returns: + dict: the result of conversion. + + Notes: + The number of devices and number of mini-batches must be same. + """ if isinstance(self.place, core.CUDAPlace): places = [ core.CUDAPlace(i) @@ -160,6 +247,24 @@ class DataFeeder(object): multi_devices, num_places=None, drop_last=True): + """ + Converter the input data into a data that returned by reader into + multiple mini-batches. Each mini-batch will be feed on each device. + + Args: + reader(fun): the input data. + multi_devices(bool): the number of places. Default None. + num_places(int): the number of places. Default None. + drop_last(bool): the number of places. Default None. + + Returns: + dict: the result of conversion. + + Raises: + ValueError: If drop_last is False and the data batch which cannot + fit for devices. + """ + def __reader_creator__(): if not multi_devices: for item in reader(): diff --git a/python/paddle/fluid/evaluator.py b/python/paddle/fluid/evaluator.py index 7c6ad6f27d..00ba1a0457 100644 --- a/python/paddle/fluid/evaluator.py +++ b/python/paddle/fluid/evaluator.py @@ -41,7 +41,12 @@ def _clone_var_(block, var): class Evaluator(object): """ - Base Class for all evaluators + Warning: better to use the fluid.metrics.* things, more + flexible support via pure Python and Operator, and decoupled + with executor. Short doc are intended to urge new user + start from Metrics. + + Base Class for all evaluators. Args: name(str): The name of evaluator. such as, "accuracy". Used for generate @@ -69,6 +74,10 @@ class Evaluator(object): def reset(self, executor, reset_program=None): """ reset metric states at the begin of each pass/user specified batch + + Args: + executor(Executor|ParallelExecutor): a executor for executing the reset_program + reset_program(Program): a single Program for reset process """ if reset_program is None: reset_program = Program() @@ -85,15 +94,16 @@ class Evaluator(object): def eval(self, executor, eval_program=None): """ Evaluate the statistics merged by multiple mini-batches. + Args: + executor(Executor|ParallelExecutor): a executor for executing the eval_program + eval_program(Program): a single Program for eval process """ raise NotImplementedError() - def create_state(self, suffix, dtype, shape): + def _create_state(self, suffix, dtype, shape): """ Create state variable. - NOTE: It is not a public API. - Args: suffix(str): the state suffix. dtype(str|core.VarDesc.VarType): the state data type @@ -113,9 +123,35 @@ class Evaluator(object): class ChunkEvaluator(Evaluator): """ + Warning: This would be deprecated in the future. Please use fluid.metrics.ChunkEvaluator + instead. + Accumulate counter numbers output by chunk_eval from mini-batches and compute the precision recall and F1-score using the accumulated counter numbers. + For some basics of chunking, please refer to + 'Chunking with Support Vector Machines '. + + Args: + input (Variable): prediction output of the network. + label (Variable): label of the test data set. + chunk_scheme (str): can be IOB/IOE/IOBES and IO. See the chunk_eval op for details. + num_chunk_types (int): the number of chunk type. + excluded_chunk_types (list): A list including chunk type ids, indicating chunk types that are not counted. + + Returns: + tuple: tuple containing: precision, recall, f1_score + + Examples: + .. code-block:: python + + exe = fluid.executor(place) + evaluator = fluid.Evaluator.ChunkEvaluator(input, label) + for epoch in PASS_NUM: + evaluator.reset(exe) + for data in batches: + loss = exe.run(fetch_list=[cost]) + distance, instance_error = distance_evaluator.eval(exe) """ def __init__( @@ -130,11 +166,11 @@ class ChunkEvaluator(Evaluator): if main_program.current_block().idx != 0: raise ValueError("You can only invoke Evaluator in root block") - self.num_infer_chunks = self.create_state( + self.num_infer_chunks = self._create_state( dtype='int64', shape=[1], suffix='num_infer_chunks') - self.num_label_chunks = self.create_state( + self.num_label_chunks = self._create_state( dtype='int64', shape=[1], suffix='num_label_chunks') - self.num_correct_chunks = self.create_state( + self.num_correct_chunks = self._create_state( dtype='int64', shape=[1], suffix='num_correct_chunks') precision, recall, f1_score, num_infer_chunks, num_label_chunks, num_correct_chunks = layers.chunk_eval( input=input, @@ -178,6 +214,8 @@ class ChunkEvaluator(Evaluator): class EditDistance(Evaluator): """ + Warning: This would be deprecated in the future. Please use fluid.metrics.EditDistance + instead. Accumulate edit distance sum and sequence number from mini-batches and compute the average edit_distance and instance error of all batches. @@ -188,15 +226,16 @@ class EditDistance(Evaluator): ignored_tokens(list of int): Tokens that should be removed before calculating edit distance. - Example: + Examples: + .. code-block:: python - exe = fluid.executor(place) - distance_evaluator = fluid.Evaluator.EditDistance(input, label) - for epoch in PASS_NUM: - distance_evaluator.reset(exe) - for data in batches: - loss = exe.run(fetch_list=[cost]) - distance, instance_error = distance_evaluator.eval(exe) + exe = fluid.executor(place) + distance_evaluator = fluid.Evaluator.EditDistance(input, label) + for epoch in PASS_NUM: + distance_evaluator.reset(exe) + for data in batches: + loss = exe.run(fetch_list=[cost]) + distance, instance_error = distance_evaluator.eval(exe) In the above example: 'distance' is the average of the edit distance in a pass. @@ -210,11 +249,11 @@ class EditDistance(Evaluator): if main_program.current_block().idx != 0: raise ValueError("You can only invoke Evaluator in root block") - self.total_distance = self.create_state( + self.total_distance = self._create_state( dtype='float32', shape=[1], suffix='total_distance') - self.seq_num = self.create_state( + self.seq_num = self._create_state( dtype='int64', shape=[1], suffix='seq_num') - self.instance_error = self.create_state( + self.instance_error = self._create_state( dtype='int64', shape=[1], suffix='instance_error') distances, seq_num = layers.edit_distance( input=input, label=label, ignored_tokens=ignored_tokens) @@ -256,9 +295,10 @@ class EditDistance(Evaluator): class DetectionMAP(Evaluator): """ + Warning: This would be deprecated in the future. Please use fluid.metrics.DetectionMAP + instead. Calculate the detection mean average precision (mAP). - TODO (Dang Qingqing): update the following doc. The general steps are as follows: 1. calculate the true positive and false positive according to the input of detection and labels. @@ -293,17 +333,18 @@ class DetectionMAP(Evaluator): - 11point: the 11-point interpolated average precision. - integral: the natural integral of the precision-recall curve. - Example: + Examples: + .. code-block:: python - exe = fluid.executor(place) - map_evaluator = fluid.Evaluator.DetectionMAP(input, - gt_label, gt_box, gt_difficult) - cur_map, accum_map = map_evaluator.get_map_var() - fetch = [cost, cur_map, accum_map] - for epoch in PASS_NUM: - map_evaluator.reset(exe) - for data in batches: - loss, cur_map_v, accum_map_v = exe.run(fetch_list=fetch) + exe = fluid.executor(place) + map_evaluator = fluid.Evaluator.DetectionMAP(input, + gt_label, gt_box, gt_difficult) + cur_map, accum_map = map_evaluator.get_map_var() + fetch = [cost, cur_map, accum_map] + for epoch in PASS_NUM: + map_evaluator.reset(exe) + for data in batches: + loss, cur_map_v, accum_map_v = exe.run(fetch_list=fetch) In the above example: @@ -340,9 +381,10 @@ class DetectionMAP(Evaluator): evaluate_difficult=evaluate_difficult, ap_version=ap_version) - self.create_state(dtype='int32', shape=None, suffix='accum_pos_count') - self.create_state(dtype='float32', shape=None, suffix='accum_true_pos') - self.create_state(dtype='float32', shape=None, suffix='accum_false_pos') + self._create_state(dtype='int32', shape=None, suffix='accum_pos_count') + self._create_state(dtype='float32', shape=None, suffix='accum_true_pos') + self._create_state( + dtype='float32', shape=None, suffix='accum_false_pos') self.has_state = None var = self.helper.create_variable( diff --git a/python/paddle/fluid/executor.py b/python/paddle/fluid/executor.py index 33d8f70941..b436dfe70a 100644 --- a/python/paddle/fluid/executor.py +++ b/python/paddle/fluid/executor.py @@ -18,17 +18,24 @@ from framework import Program, default_main_program, Variable from . import core __all__ = [ - 'Executor', 'global_scope', 'scope_guard', 'switch_scope', 'fetch_var' + 'Executor', 'global_scope', 'scope_guard', '_switch_scope', 'fetch_var' ] g_scope = core.Scope() def global_scope(): + """ + Get the global/default scope instance. There are a lot of APIs use + :code:`global_scope` as its default value, e.g., :code:`Executor.run` + + Returns: + Scope: The global/default scope instance. + """ return g_scope -def switch_scope(scope): +def _switch_scope(scope): global g_scope ex = g_scope g_scope = scope @@ -37,12 +44,42 @@ def switch_scope(scope): @contextlib.contextmanager def scope_guard(scope): - ex = switch_scope(scope) + """ + Change the global/default scope instance by Python `with` statement. All + variable in runtime will assigned to the new scope. + + Examples: + >>> import paddle.fluid as fluid + >>> new_scope = fluid.Scope() + >>> with fluid.scope_guard(new_scope): + >>> ... + + Args: + scope: The new global/default scope. + """ + ex = _switch_scope(scope) yield - switch_scope(ex) + _switch_scope(ex) def as_numpy(tensor): + """ + Convert a Tensor to a numpy.ndarray, its only support Tensor without LoD information. + For higher dimensional sequence data, please use LoDTensor directly. + Examples: + >>> import paddle.fluid as fluid + >>> outs = executor.run(...) + >>> np_outs = map(lambda x: as_numpy(x), outs) + >>> ... + + Args: + tensor(Variable): a instance of Tensor + + Returns: + numpy.ndarray + """ + if isinstance(tensor, core.LoDTensorArray): + return [as_numpy(t) for t in tensor] if isinstance(tensor, list): return [as_numpy(t) for t in tensor] assert isinstance(tensor, core.LoDTensor) @@ -135,14 +172,18 @@ def has_fetch_operators(block, fetch_targets, fetch_holder_name): def fetch_var(name, scope=None, return_numpy=True): """ - Fetch the value of the variable with the given name from the given scope + Fetch the value of the variable with the given name from the + given scope. + Args: name(str): name of the variable. Typically, only persistable variables can be found in the scope used for running the program. scope(core.Scope|None): scope object. It should be the scope where you pass to Executor.run() when running your program. - If None, global_scope() will be used. - return_numpy(bool): whether convert the tensor to numpy.ndarray + If None, global_scope() will be used. Default None. + return_numpy(bool): whether convert the tensor to numpy.ndarray. + Default True. + Returns: LodTensor|numpy.ndarray """ @@ -162,7 +203,7 @@ def fetch_var(name, scope=None, return_numpy=True): return tensor -def get_program_cache_key(feed, fetch_list): +def _get_program_cache_key(feed, fetch_list): feed_var_names = feed.keys() def to_name_str(var): @@ -181,6 +222,25 @@ def get_program_cache_key(feed, fetch_list): class Executor(object): + """ + An Executor in Python, only support the single-GPU running. For multi-cards, please refer to + ParallelExecutor. + Python executor takes a program, add feed operators and fetch operators to this program according + to feed map and fetch_list. Feed map provides input data for the program. fetch_list provides + the variables(or names) that user want to get after program run. Note: the executor will run all + operators in the program but not only the operators dependent by the fetch_list. + It store the global variables into the global scope, and create a local scope for the temporary + variables. The local scope contents will be discarded after every minibatch forward/backward finished. + But the global scope variables will be persistent through different runs. + All of ops in program will be running in sequence. + + Args: + place(core.CPUPlace|core.CUDAPlace(n)): indicate the executor run on which device + + Note: For debugging complicated network in parallel-GPUs, you can test it on the executor. + They has the exactly same arguments, and expected the same results. + """ + def __init__(self, place): self.place = place p = core.Place() @@ -189,6 +249,23 @@ class Executor(object): self.program_caches = dict() def as_lodtensor(self, data): + """ + Convert numpy.ndarray to Tensor, its only support Tensor without LoD information. + For higher dimensional sequence data, please use LoDTensor directly. + + Examples: + >>> import paddle.fluid as fluid + >>> exe = fluid.executor(fluid.CPUPlace()) + >>> data = np.array(size=(100, 200, 300)) + >>> np_outs = map(lambda x: exe.as_lodtensor(x), data) + >>> ... + + Args: + data(numpy.ndarray): a instance of array + + Returns: + LoDTensor + """ if isinstance(data, list): raise RuntimeError("Some of your feed data hold LoD information. \ They can not be completely cast from a list of Python \ @@ -271,6 +348,12 @@ class Executor(object): ] return outs + def begin_pass(self): + self.executor.begin_pass() + + def end_pass(self): + self.executor.end_pass() + def run(self, program=None, feed=None, @@ -280,23 +363,47 @@ class Executor(object): scope=None, return_numpy=True, use_program_cache=False): - """ Run program by this Executor. Feed data by feed map, fetch result by fetch_list. - + """ + Run program by this Executor. Feed data by feed map, fetch result by fetch_list. Python executor takes a program, add feed operators and fetch operators to this program according to feed map and fetch_list. Feed map provides input data for the program. fetch_list provides - the variables(or names) that user want to get after program run. Note: the executor will run all + the variables(or names) that user want to get after program run. + + Note: the executor will run all operators in the program but not only the operators dependent by the fetch_list - :param program: the program that need to run, if not provied, then default_main_program will be used. - :param feed: feed variable map, e.g. {"image": ImageData, "label": LableData} - :param fetch_list: a list of variable or variable names that user want to get, run will return them according - to this list. - :param feed_var_name: the name for the input variable of feed Operator. - :param fetch_var_name: the name for the output variable of feed Operator. - :param scope: the scope used to run this program, you can switch it to different scope. default is global_scope - :param return_numpy: if convert the fetched tensor to numpy - :param use_program_cache: set use_program_cache to true if program not changed compare to the last step. - :return: result according to fetch_list. + Args: + program(Program): the program that need to run, if not provied, then default_main_program will be used. + feed(dict): feed variable map, e.g. {"image": ImageData, "label": LableData} + fetch_list(list): a list of variable or variable names that user want to get, run will return them according to this list. + feed_var_name(str): the name for the input variable of feed Operator. + fetch_var_name(str): the name for the output variable of fetch Operator. + scope(Scope): the scope used to run this program, you can switch it to different scope. default is global_scope + return_numpy(bool): if convert the fetched tensor to numpy + use_program_cache(bool): set use_program_cache to true if program not changed compare to the last step. + + Returns: + + list(numpy.array): fetch result according to fetch_list. + + + Examples: + + >>> data = layers.data(name='X', shape=[1], dtype='float32') + >>> hidden = layers.fc(input=data, size=10) + >>> layers.assign(hidden, out) + >>> loss = layers.mean(out) + >>> adam = fluid.optimizer.Adam() + >>> adam.minimize(loss) + + >>> cpu = core.CPUPlace() + >>> exe = Executor(cpu) + >>> exe.run(default_startup_program()) + + >>> x = numpy.random.random(size=(10, 1)).astype('float32') + >>> outs = exe.run( + >>> feed={'X': x}, + >>> fetch_list=[loss.name]) """ if feed is None: feed = {} @@ -317,7 +424,7 @@ class Executor(object): if scope is None: scope = global_scope() - cache_key = get_program_cache_key(feed, fetch_list) + cache_key = _get_program_cache_key(feed, fetch_list) if use_program_cache: cached_program = self._get_program_cache(cache_key) if cached_program is None: diff --git a/python/paddle/fluid/framework.py b/python/paddle/fluid/framework.py index f6438c82ac..ea3117e02b 100644 --- a/python/paddle/fluid/framework.py +++ b/python/paddle/fluid/framework.py @@ -19,7 +19,16 @@ import re import numpy as np import proto.framework_pb2 as framework_pb2 -from . import core +try: + from . import core +except ImportError, e: + raise ImportError( + """NOTE: You may need to run \"export LD_LIBRARY_PATH=/usr/local/lib:$LD_LIBRARY_PATH\" + if you encounters \"libmkldnn.so not found\" errors. If you have python + installed in other directory, replace \"/usr/local/lib\" with your own + directory. The original error is: \n""" + e.message) +except Exception, e: + raise e import unique_name __all__ = [ @@ -27,11 +36,10 @@ __all__ = [ 'Variable', 'Program', 'Operator', + 'Parameter', 'default_startup_program', 'default_main_program', 'program_guard', - 'switch_startup_program', - 'switch_main_program', 'get_var', ] @@ -43,7 +51,8 @@ ZERO_VAR_SUFFIX = core.kZeroVarSuffix() def grad_var_name(var_name): """ - return gradient name for a certain var name + Returns: + str: gradient name for a certain var name """ return var_name + GRAD_VAR_SUFFIX @@ -51,10 +60,12 @@ def grad_var_name(var_name): def convert_np_dtype_to_dtype_(np_dtype): """ Convert the data type in numpy to the data type in Paddle + Args: - np_dtype(np.dtype): the data type in numpy + np_dtype(np.dtype): the data type in numpy. - Returns(core.VarDesc.VarType): the data type in Paddle + Returns: + core.VarDesc.VarType: the data type in Paddle. """ dtype = np.dtype(np_dtype) @@ -120,37 +131,53 @@ def _debug_string_(proto, throw_on_error=True): class Variable(object): """ - Python variable. Every input and output of an operator is a variable. Every - variable belongs to a block. The variable has a name and two variables in - different blocks could have the same name. + In Fluid, every input and output of an operator is a variable. In most + cases, variables are used for holding different kinds of data or training + labels. A variable belongs to a block. All variable has its own name and + two variables in different blocks could have the same name. - There are many kinds of variables. Please reference the framework.proto for - details. + There are many kinds of variables. Each kind of them has its own attributes + and usages. Please reference the framework.proto for details. - Notes: The constructor of Variable should not be invoked directly. Please - use `Block.create_var` to create a variable. - - >>> cur_program = Program() - >>> cur_block = cur_program.current_block() - >>> new_variable = cur_block.create_var( - >>> name="X", shape=[-1, 23, 48], dtype='float32') + Most of a Variable's member variables can be setted to be None. It mean + it is not available or will be specified later. Args: - block(Block): The associated block. It will be passed by - `Block.create_var` automatically. + block(Block): The block that the variable belongs to. type(core.VarDesc.VarType): Variable type. Please reference the framework.proto for details. - shape(tuple|list|None): The shape of variable. -1 means the batch size. + name(str|None): The name of the variable. If setted None, it will be + generated automatically. Default: None + shape(tuple|list|None): The shape of the variable. -1 means the batch size. Some kinds of variable do not contain shape, just set it to None. - dtype(np.dtype|core.VarDesc.VarType|str): The data type of variable. - lod_level(int): The level of lod tensor. 0 means it is not a time + Default: None + dtype(np.dtype|core.VarDesc.VarType|str|None): The data type of variable. + Default: None + lod_level (int|None): The level of lod tensor. 0 means it is not a time series data. - capacity(int): The capacity of Channel variable. Ignored - for other types. - persistable(bool): True if the variable should be saved as check point. - Defaults to False. - stop_gradient(bool): True if the variable will stop to calculate - gradients when backward. Defaults to False. + Default: None + capacity (int|None): The capacity of Channel variable. Ignored for other + types. Default: None + persistable (bool|None): True if the variable is persistable. A persistable + variable will not be deleted after an iteration ending. Defaults: None. + error_clip (BaseErrorClipAttr|None): The error clip attributes of the + corresponding gradient variable. Default: None + stop_gradient (bool): True if the variable will stop to calculate its + gradients when backward. Default: False. + is_data (bool): True if the variable is an input data. Default: False + + Notes: + The constructor of Variable should not be invoked directly. Please + use `Block.create_var` to create a variable. + + Examples: + .. code-block:: python + + cur_program = Program() + cur_block = cur_program.current_block() + new_variable = cur_block.create_var(name="X", + shape=[-1, 23, 48], + dtype='float32') """ def __init__(self, @@ -253,13 +280,14 @@ class Variable(object): Get debug string. Args: - throw_on_error(bool): True if raise an exception when self is not - intialized. + throw_on_error(bool): True if raise an exception when self is + not initialized. with_details(bool): more details about variables and parameters - (e.g. trainable, optimize_attr, ...) will be printed when with_details is True - - Returns(str): The debug string. + (e.g. trainable, optimize_attr, ...) will be printed when + with_details is True. Default False; + Returns: + str: The debug string. """ assert isinstance(throw_on_error, bool) and isinstance(with_details, bool) @@ -276,6 +304,15 @@ class Variable(object): __repr__ = __str__ def set_desc(self, input): + """ + Set the variable description. + + Args: + input(core.VarDesc): The new VarDesc. + + Returns: + None + """ self.desc = input @property @@ -312,6 +349,15 @@ class Variable(object): return self.desc.type() def set_error_clip(self, error_clip): + """ + Set the error_clip. + + Args: + error_clip(BaseErrorClipAttr) : The new error_clip. + + Returns: + None + """ self.error_clip = error_clip @@ -319,8 +365,8 @@ def get_all_op_protos(): """ Get all registered op proto from PaddlePaddle C++ end. - Returns(list): list of OpProto - + Returns: + list: list of OpProto. """ protostrs = core.get_all_op_protos() ret_values = [] @@ -373,16 +419,52 @@ class OpProtoHolder(object): class Operator(object): """ - Python Operator class. The operator represents the build in instructions in a - Block. Users can use the build in instructions to describe their neural - network. + In Fluid, all the operation are represented by Operator, and Operator + is regarded as a build in an instruction of a Block. Users can use the + build in instructions to describe their neural network. + + Args: + block(Block): The block has the current operator. + desc(core.OpDesc): The protobuf description of Operator. + type(str): The type of operator. Default None. + inputs(dict): The input of this Operator. it is a dictionary, for every + element, key is the input parameter name, and value is a list of + variables. Default None. + outputs(dict): The output of this Operator. it is a dictionary, for + every element, key is the input parameter name, and value is a list + of variables. Default None. + attrs(dict): The attributes of this Operator. it is a dictionary, for + every element, key is attribute name, and value is the attribute value. + The attribute type should be as same as the type registered in C++ side. + Default None. + + Returns: + Operator: The initialized Operator. + + Raises: + ValueError: If the passed input, output and attrs doesn't match the + initializing Operator's that registered in C++ side. + + Notes: + The constructor of operator should not be invoked directly. Use + Block.append_op or Block.prepend_op instead. + + Examples: + .. code-block:: python + + cur_program = Program() + cur_block = cur_program.current_block() + # var1 += var2 + var3 + cur_block.append_op(type="sum", + inputs={"X": [var1, var2, var3]}, + outputs={"Out": [var1]}) """ OP_WITHOUT_KERNEL_SET = { 'feed', 'fetch', 'save', 'load', 'recurrent', 'go', 'rnn_memory_helper_grad', 'conditional_block', 'while', 'send', 'recv', 'listen_and_serv', 'parallel_do', 'save_combine', 'load_combine', 'ncclInit', 'channel_create', 'channel_close', 'channel_send', - 'channel_recv', 'select', 'gen_nccl_id' + 'channel_recv', 'select', 'checkpoint_notify', 'gen_nccl_id' } def __init__(self, @@ -392,31 +474,7 @@ class Operator(object): inputs=None, outputs=None, attrs=None): - """ - Constructor. - - Notes: The constructor of operator should not be invoked directly. Use - Block.append_op or Block.prepend_op instead. - >>> cur_program = Program() - >>> cur_block = cur_program.current_block() - >>> # var1 += var2 + var3 - >>> cur_block.append_op(type="sum", - >>> inputs={"X": [var1, var2, var3]}, - >>> outputs={"Out": [var1]}) - - Args: - block(Block): The block has the current operator. - desc(core.OpDesc): The protobuf description. - type(str): The type of operator. - inputs(dict): The input dictionary. Key is the input parameter name. - Value is a list of variables. - outputs(dict): The output dictionary which has the same format with - inputs. - attrs(dict): The attributes dictionary. Key is attribute name. Value - is the attribute value. The attribute type should be as same as - the type registered in C++ - """ self.block = block self.desc = desc self.attrs = attrs @@ -510,15 +568,9 @@ class Operator(object): if (attr_name not in self.attrs) or ( self.attrs[attr_name] is None): continue - if isinstance(self.attrs[attr_name], Block): - self.desc.set_block_attr(attr_name, - self.attrs[attr_name].desc) - elif isinstance(self.attrs[attr_name], core.BlockDesc) or \ - isinstance(self.attrs[attr_name], core.ProgramDesc): - self.desc.set_serialized_attr( - attr_name, self.attrs[attr_name].serialize_to_string()) - else: - self.desc.set_attr(attr_name, self.attrs[attr_name]) + attr_val = self.attrs[attr_name] + self._update_desc_attr(attr_name, attr_val) + self.desc.check_attrs() if self.has_kernel(type): self.desc.infer_var_type(self.block.desc) @@ -529,12 +581,14 @@ class Operator(object): def to_string(self, throw_on_error): """ - To debug string. + Get debug string. + Args: - throw_on_error(bool): raise exception when self is not initialized - when throw_on_error is True + throw_on_error(bool): Whether to raise exception if self is not + initialized. - Returns(str): The debug string. + Returns: + str: The debug string. """ protostr = self.desc.serialize_to_string() @@ -552,29 +606,45 @@ class Operator(object): def input(self, name): """ - Get input arguments by the input parameter name - Args: - name(str): The input parameter name + Get the input arguments according to the input parameter name. - Returns(list): return the list of argument names associated with the - specific parameter name. + Args: + name(str): The input parameter name. + Returns: + list: return the list of argument names that associated with \ + the specific parameter name. """ return self.desc.input(name) def rename_input(self, old_name, new_name): + """ + Rename the `old_name` to `new_name`. + + Args: + old_name(str): The old name of the Operator's input. + new_name(str): The new name of the Operator's input. + + Returns: + None + """ self.desc.rename_input(old_name, new_name) def rename_output(self, old_name, new_name): + """ + Rename the `old_name` to `new_name`. + + Args: + old_name(str): The old name of the Operator's output. + new_name(str): The new name of the Operator's output. + + Returns: + None + """ self.desc.rename_output(old_name, new_name) @property def input_names(self): - """ - Get all input parameter names - Returns(list): return a list of input parameter names - - """ return self.desc.input_names() @property @@ -587,33 +657,23 @@ class Operator(object): def output(self, name): """ - Get output arguments by the output parameter name - Args: - name(str): The output parameter name + Get output arguments by the output parameter name. - Returns(list): return the list of argument names associated with the - specific parameter name. + Args: + name(str): The output parameter name. + Returns: + list: return the list of argument names associated with \ + the specific parameter name. """ return self.desc.output(name) @property def output_names(self): - """ - Get all output parameter names - Returns(list): return a list of output parameter names - - """ return self.desc.output_names() @property def idx(self): - """ - Return the array index of current operator. - Returns(int): The array index in block.ops array - Raises: - ValueError: when the operator is not found. - """ for i, op in enumerate(self.block.ops): if op == self: return i @@ -622,66 +682,100 @@ class Operator(object): def has_attr(self, name): """ - operator has the attribute with name or not. + Whether this Operator has the attribute with name or not. + Args: - name(str): the attribute name + name(str): the attribute name. - Returns(bool): True if has this attribute. + Returns: + bool: True if has this attribute. """ return self.desc.has_attr(name) def attr_type(self, name): """ - Get the type of attribute by attribute name - Args: - name(str): the attribute name + Get the type of attribute by attribute's name. - Returns(core.AttrType): the attribute type + Args: + name(str): the attribute name. + Returns: + core.AttrType: the attribute type. """ return self.desc.attr_type(name) def set_attr(self, name, val): + """ + Set the value of attribute by attribute's name. + + Args: + name(str): the attribute name. + val(bool|int|str|float|list): the value of the attribute. + + Raises: + ValueError: If the type of value doesn't match with desc.attr_type(name). + """ self.attrs[name] = val - self.desc.set_attr(name, val) + self._update_desc_attr(name, val) - @property - def attr_names(self): + def _update_desc_attr(self, name, val): """ - Get all attribute names - Returns(list): The list of attribute name + Update the value of desc's attribute by attribute's name. + + Args: + name(str): the attribute name. + val(bool|int|str|float|list): the value of the attribute. + Raises: + ValueError: If the type of value doesn't match with desc.attr_type(name). """ + if isinstance(val, Block): + self.desc.set_block_attr(name, val.desc) + elif isinstance(val, list) and val and all( + isinstance(v, Block) for v in val): + self.desc.set_blocks_attr(name, [v.desc for v in val]) + elif isinstance(val, core.BlockDesc) or \ + isinstance(val, core.ProgramDesc): + self.desc.set_serialized_attr(name, val.serialize_to_string()) + else: + self.desc.set_attr(name, val) + + @property + def attr_names(self): return self.desc.attr_names() def attr(self, name): """ - Get attribute by name + Get the attribute by name. + Args: - name(str): the attribute name + name(str): the attribute name. - Returns(bool|int|str|float|list): The attribute value. The return value + Returns: + bool|int|str|float|list: The attribute value. The return value can be any valid attribute type. - """ return self.desc.attr(name) def block_attr(self, name): """ - Get the block attribute by name - Args: - name(str): the attribute name + Get the block attribute by name. - Returns(int): the block index + Args: + name(str): the attribute name. + Returns: + int: the block index. """ return self.desc.block_attr(name) def all_attrs(self): """ - Get the attribute dict - Returns(dict): The Operator's attribute dict + Get the attribute dict. + + Returns: + dict: The Operator's attribute dict. """ attr_names = self.attr_names attr_map = {} @@ -694,6 +788,35 @@ class Operator(object): class Block(object): + """ + In Fluid, a Program is consistence of multi-Block, and Block stores + VarDesc and OpDesc. In a specific Block, a VarDesc have a unique name. + One block could have some child blocks, and child block's name scopes + should inherit the parent's so that OpDesc in child block can reference + a VarDesc that is stored in the parent block. + Please reference the framework.proto for details. + + Args: + program(Program): The Program that the Block belongs to. + idx(int): The block's id in the Program. + + Notes: + The constructor of Block should not be invoked directly. Please + use `Program.create_block()` to create a block. + + Examples: + .. code-block:: python + + cur_program = Program() + cur_block = cur_program.current_block() + var = cur_block.create_var(name="X", + shape=[-1, 23, 48], + dtype='float32') + cur_block.append_op(type="abs", + inputs={"X": [var]}, + outputs={"Out": [var]}) + """ + def __init__(self, program, idx): self.desc = program.desc.block(idx) self.vars = collections.OrderedDict() # var_name --> var @@ -706,15 +829,17 @@ class Block(object): def to_string(self, throw_on_error, with_details=False): """ - To debug string. + Get debug string. + Args: throw_on_error(bool): raise exception when self is not initialized - when throw_on_error is True + when throw_on_error is True. with_details(bool): more details about variables and parameters - (e.g. trainable, optimize_attr, ...) will be printed when with_details is True - - Returns(str): The debug string. + (e.g. trainable, optimize_attr, ...) will be printed when + with_details is True. Default False. + Returns: + str: The debug string. """ assert isinstance(throw_on_error, bool) and isinstance(with_details, bool) @@ -746,6 +871,15 @@ class Block(object): return self.desc.get_forward_block_idx() def set_forward_block_idx(self, idx): + """ + Set the forward block Idx. + + Args: + idx(int): the block index. + + Returns: + None + """ self.desc.set_forward_block_idx(idx) @property @@ -753,6 +887,19 @@ class Block(object): return self.desc.id def var(self, name): + """ + Get a Variable by name from this block. + + Args: + name(str): the Variable's name. + + Raises: + ValueError: The If input's type is not str, or this block + doesn't have a Variable with the giving name. + + Returns: + Variable: the Variable with the giving name. + """ if not isinstance(name, basestring): raise TypeError( "var require string as parameter, but get %s instead." % @@ -763,6 +910,19 @@ class Block(object): return v def var_recursive(self, name): + """ + Get a Variable by name from this block recursively. + + Args: + name(str): the Variable's name. + + Raises: + ValueError: this block and this parent block doesn't + have a Variable with the giving name. + + Returns: + Variable: the Variable with the giving name. + """ frontier = list() visited = set() @@ -809,6 +969,18 @@ class Block(object): def rename_var(self, name, new_name): """ Rename variable in vars and ops' inputs and outputs + + Args: + name(str): the name that need to be renamed. + new_name(str): the name that need to rename to. + + Raises: + ValueError: If this block doesn't have this the giving name, + or the type of the var with the giving name is not Parameter + or Variable. + + Returns: + Variable: the Variable with the giving name. """ if not self.has_var(name): raise ValueError("var %s is not in current block" % name) @@ -872,12 +1044,27 @@ class Block(object): return param def append_op(self, *args, **kwargs): + """ + Appends a new Operator according to the giving arguments. + + Returns: + Operator: the append Operator. + """ op_desc = self.desc.append_op() op = Operator(block=self, desc=op_desc, *args, **kwargs) self.ops.append(op) return op def insert_op(self, index, *args, **kwargs): + """ + Insert a Operator according to the giving arguments. + + Args: + index(int): the place that the operator to insert. + + Returns: + Operator: the insert Operator. + """ self.sync_with_cpp() op_desc = self.desc.insert_op(index) op = Operator(block=self, desc=op_desc, *args, **kwargs) @@ -885,11 +1072,30 @@ class Block(object): return op def remove_op(self, index): + """ + Remove the specific position operator. + + Args: + index(int): the position that the operator to insert. + + Returns: + None + """ self.sync_with_cpp() self.desc.remove_op(index, index + 1) del self.ops[index] def slice_ops(self, start, end): + """ + Return the Operator between start and end. + + Args: + start(int): the start position. + end(int): the end position. + + Returns: + list: the Operators between start and end. + """ return self.ops[start:end] def prepend_op(self, *args, **kwargs): @@ -900,9 +1106,8 @@ class Block(object): def sync_with_cpp(self): """ - Sync from the desc on the c++ end. - - This method is used to synchronize the c++ desc instance generated by backward. + Sync from the desc on the c++ end. This method is used to synchronize + the c++ desc instance generated by backward. """ # sync variables from cpp for var in self.desc.all_vars(): @@ -967,9 +1172,14 @@ class Block(object): def copy_param_info_from(self, other): """ - Copy the information of parameters from the other block + Copy the information of parameters from the other block. + Args: - other(Block): the other block + other(Block): the other block. + + Raises: + ValueError: If type of input is not Block, or the `other` and this + block is not in the same topology. Returns: None @@ -1001,11 +1211,12 @@ class Block(object): def clone_variable(self, var): """ Clone a variable into current block. + Args: var: the variable to be cloned. Returns: - The new variable cloned from 'var' in current block. + Variable: the new variable cloned from 'var' in current block. """ assert isinstance(var, Variable) ret_var = None @@ -1013,6 +1224,9 @@ class Block(object): if var.type == core.VarDesc.VarType.STEP_SCOPES: ret_var = self.create_var( name=var.name, persistable=var.persistable, type=var.type) + elif var.type == core.VarDesc.VarType.RAW: + ret_var = self.create_var( + name=var.name, persistable=var.persistable, type=var.type) elif var.type == core.VarDesc.VarType.SELECTED_ROWS: ret_var = self.create_var( name=var.name, @@ -1034,6 +1248,32 @@ class Block(object): class Program(object): + """ + Python Program. Beneath it is a ProgramDesc, which is used for + create c++ Program. A program is a self-contained programing + language like container. It has at least one Block, when the + control flow op like conditional_block, while_op is included, + it will contains nested block. + Please reference the framework.proto for details. + + Notes: we have default_startup_program and default_main_program + by default, a pair of them will shared the parameters. + The default_startup_program only run once to initialize parameters, + default_main_program run in every mini batch and adjust the weights. + + Returns: + A empty program. + + Examples: + >>> main_program = fluid.Program() + >>> startup_program = fluid.Program() + >>> with fluid.program_guard(main_program=main_program, startup_program=startup_program): + >>> fluid.layers.data(name="x", shape=[-1, 784], dtype='float32') + >>> fluid.layers.data(name="y", shape=[-1, 1], dtype='int32') + >>> fluid.layers.fc(name="fc", shape=[10], dtype='float32', act="relu") + + """ + def __init__(self): self.desc = core.ProgramDesc() self.blocks = [Block(self, 0)] @@ -1044,6 +1284,19 @@ class Program(object): @property def op_role(self): + """ + The operator role. In a enum {Forward, Backward, Optimize}. + + Notes: this is a low level API. It is used only for ParallelExecutor to + duplicate or schedule operator to devices. + + For example, the forward operator should be executed on every device. + The backward operator should be executed on every device and the + parameter gradient of backward (use :code:`op_role_var` to get this + variable) operator should be merged to one device. The optimization + operators should be executed on only one device and broadcast the + optimization result, i.e., the new parameter, to every other device. + """ return self._current_role @op_role.setter @@ -1052,6 +1305,13 @@ class Program(object): @property def op_role_var(self): + """ + The auxiliary variables for :code:`op_role` property. + + See Also: :code:`Program.op_role`'s documentation for details. + + Notes: This is a very low-level API. Users should not use it directly. + """ return self._op_role_var @op_role_var.setter @@ -1060,6 +1320,21 @@ class Program(object): @contextlib.contextmanager def optimized_guard(self, var): + """ + A with guard to set :code:`Optimization` :code:`OpRole` and + :code:`OpRoleVar` automatically. + + Notes: This is a very low level API. Users should not use it directly. + + Args: + var(Variable|str): The variable (name) to be optimized. + + Examples: + + >>> p, g = backward(...) + >>> with program.optimized_guard(p): + >>> p = p - 0.001 * g + """ OpRole = core.op_proto_and_checker_maker.OpRole self._current_role = OpRole.Optimize self._op_role_var = [var.name if isinstance(var, Variable) else var] @@ -1068,18 +1343,35 @@ class Program(object): self._current_role = OpRole.Forward def __str__(self): + """ + Get the protobuf debug string of this Program. + + Returns: + (str): The protobuf debug string. + + Raises: + ValueError: If any of required fields is not set. + """ return self.to_string(True) def to_string(self, throw_on_error, with_details=False): """ To debug string. + Args: - throw_on_error(bool): raise exception when self is not initialized - when throw_on_error is True - with_details(bool): more details about variables and parameters - (e.g. trainable, optimize_attr, ...) will be printed when with_details is True + throw_on_error(bool): raise Value error when any of required fields + is not set. - Returns(str): The debug string. + with_details(bool): True if more details about variables and + parameters, e.g., :code:`trainable`, :code:`optimize_attr`, need + to print. + + Returns + (str): The debug string. + + Raises: + ValueError: If any of required fields is not set and throw_on_error is + True. """ assert isinstance(throw_on_error, bool) and isinstance(with_details, @@ -1095,22 +1387,93 @@ class Program(object): return res_str def get_desc(self): + """ + Get the C++ side of `ProgramDesc` object pointer. The C++ object is + exposed by :code:`pybind`. + + Notes: This is a very low level API. Users should not use this API + directly. + """ return self.desc def clone(self, for_test=False): - """Clone the Program object + """ + Create a new, duplicated program. + + + Some operators, e.g., :code:`batch_norm`, behave differently between + training and testing. They have an attribute, :code:`is_test`, to + control this behaviour. This method will change the :code:`is_test` + attribute of them to :code:`True` when :code:`for_test=True`. - Set for_test to False when we want to clone the program for training. - Set for_test to True when we want to clone the program for testing. + * Set for_test to False when we want to clone the program for training. + * Set for_test to True when we want to clone the program for testing. + + Notes: This API DOES NOT prune any operator. Use + :code:`clone(for_test=True)` before backward and optimization please. e.g. + + >>> test_program = fluid.default_main_program().clone(for_test=True) + >>> optimizer = fluid.optimizer.Momentum(learning_rate=0.01, momentum=0.9) + >>> optimizer.minimize() Args: - for_test(bool): Some operators, such as batch_norm and drop_out ops, - behave differently in training and testing. If for_test is True, - the is_test attributes in these operators will be set to True for - testing purposes, otherwise, they remain unchanged. + for_test(bool): True if change the :code:`is_test` attribute of + operators to :code:`True`. - Returns(Program): - The cloned Program object. + Returns: + Program: The new, duplicated Program object. + + Examples: + + 1. To clone a test program, the sample code is: + + >>> import paddle.fluid as fluid + >>> train_program = fluid.Program() + >>> startup_program = fluid.Program() + >>> with fluid.program_guard(train_program, startup_program): + >>> img = fluid.layers.data(name='image', shape=[784]) + >>> hidden = fluid.layers.fc(input=img, size=200, act='relu') + >>> hidden = fluid.layers.dropout(hidden, dropout_prob=0.5) + >>> loss = fluid.layers.cross_entropy( + >>> input=fluid.layers.fc(hidden, size=10, act='softmax'), + >>> label=fluid.layers.data(name='label', shape=[1], dtype='int64')) + >>> + >>> test_program = train_program.clone(for_test=True) + >>> + >>> sgd = fluid.optimizer.SGD(learning_rate=1e-3) + >>> with fluid.program_guard(train_program, startup_program): + >>> sgd.minimize(loss) + + 2. The :code:`clone` method can be avoid if you create program for + training and program for testing individually. + + >>> import paddle.fluid as fluid + >>> + >>> def network(is_test): + >>> img = fluid.layers.data(name='image', shape=[784]) + >>> hidden = fluid.layers.fc(input=img, size=200, act='relu') + >>> hidden = fluid.layers.dropout(hidden, dropout_prob=0.5, is_test=is_test) + >>> loss = fluid.layers.cross_entropy( + >>> input=fluid.layers.fc(hidden, size=10, act='softmax'), + >>> label=fluid.layers.data(name='label', shape=[1], dtype='int64')) + >>> return loss + >>> + >>> train_program = fluid.Program() + >>> startup_program = fluid.Program() + >>> test_program = fluid.Program() + >>> + >>> with fluid.program_guard(train_program, startup_program): + >>> with fluid.unique_name.guard(): + >>> loss = network(is_test=False) + >>> sgd = fluid.optimizer.SGD(learning_rate=1e-3) + >>> sgd.minimize(loss) + >>> + >>> # the test startup program is not used. + >>> with fluid.program_guard(test_program, fluid.Program()): + >>> with fluid.unique_name.guard(): + >>> loss = network(is_test=True) + + The two code snippets above will generate same programs. """ if for_test: p = self.inference_optimize() @@ -1125,6 +1488,21 @@ class Program(object): return p def prune(self, targets): + """ + Prune operators and variables which are not needed to generate + :code:`targets`. + + Notes: This is a very low level API. Users should not use this API + directly. This API is in flux and not stable. + + Args: + targets(list|Variable|Operator): A list of variables or operators + need to be pruned + + Returns: + Program: A new, pruned program. + + """ if not isinstance(targets, list): targets = [targets] targets_idx = [] @@ -1159,6 +1537,17 @@ class Program(object): return res def inference_optimize(self): + """ + This method will create a new program and change the :code:`is_test` + attribute of operators to :code:`True`. All the :code:`Parameter` + information will be lost. + + Notes: This API is a very low level API. Use + :code:`Program.clone(for_test=True)` instead. + + Returns: + Program: The new program. + """ # this is an alternative implement before # core.inference_optimize being fixed. res = Program() @@ -1175,6 +1564,18 @@ class Program(object): @staticmethod def parse_from_string(binary_str): + """ + Deserialize a program desc from protobuf binary string. + + Notes: All information about parameters will be lost after serialization + and deserialization. + + Args: + binary_str(str): The binary prootbuf string. + + Returns: + Program: A deserialized program desc. + """ p = Program() p.desc = core.ProgramDesc(binary_str) p.blocks = [Block(p, i) for i in xrange(p.desc.num_blocks())] @@ -1183,10 +1584,19 @@ class Program(object): @property def random_seed(self): + """ + The default random seed for random operators in Program. Zero means get + the random seed from random device. + + Notes: It must be set before the operators have been added. + """ return self._seed @property def num_blocks(self): + """ + The number of blocks in this program. + """ return self.desc.num_blocks() @random_seed.setter @@ -1199,15 +1609,40 @@ class Program(object): return str(self) def global_block(self): + """ + Get the first block of this program. + """ return self.blocks[0] def block(self, index): + """ + Get the :code:`index` block of this program + Args: + index(int): The index of block to get + + Returns: + Block: The :code:`index` block + """ return self.blocks[index] def current_block(self): + """ + Get the current block. The :code:`current` block is the block to append + operators. + """ return self.blocks[self.current_block_idx] def create_block(self, parent_idx=None): + """ + Create a new block with the :code:`parent_idx` and change the current block + to new block. + + Args: + parent_idx(int): The parent block index. + + Returns: + Block: The new block. + """ new_block_idx = len(self.blocks) parent = self.current_block() if parent_idx is None else self.block( parent_idx) @@ -1217,9 +1652,24 @@ class Program(object): return self.current_block() def rollback(self): + """ + Exit a code block, i.e., roll back to the parent block. + Returns: + None + """ self.current_block_idx = self.current_block().parent_idx def sync_with_cpp(self): + """ + Synchronize Python instance to its binding C++ object instance. + If the program is modified in C++ space, this method should be invoked. + + Notes: This is a very low level API. Users should not invoke it + directly. + + Returns: + None + """ for block_idx in range(len(self.blocks), self.desc.num_blocks()): self.blocks.append(Block(self, block_idx)) for block in self.blocks: @@ -1228,6 +1678,10 @@ class Program(object): def copy_param_info_from(self, other): """ Copy the information of parameters from other program. + + Notes: This is a very low level API. Users should not invoke it + directly. + Args: other(Program): Other program @@ -1246,6 +1700,10 @@ class Program(object): def copy_data_info_from(self, other): """ Copy the information of data variables from other program. + + Notes: This is a very low level API. Users should not invoke it + directly. + Args: other(Program): Other program @@ -1264,12 +1722,41 @@ class Program(object): self.global_block().var(var.name).is_data = True def list_vars(self): + """ + Get all variables from this Program. A iterable object is returned. + + Returns: + iterable: The generator will yield every variable in this program. + """ for each_block in self.blocks: for each_var in each_block.vars.itervalues(): yield each_var class Parameter(Variable): + """ + Parameter is derived from Variable. A parameter is a persistable + Variable, and will be updated by optimizers after each iteration. + The training of a neural network is essentially the updating of + its parameters. + + Relative to a general Variable, a Parameter has several its own + member variables: + + Args: + trainable(bool): True if the parameter need to be updated after + iterations. + optimize_attr(map): Parameter attributes related with optimizing. + Currently, it only contains 'learning_rate'. + Default: {'learning_rate': 1.0} + regularizer(WeightDecayRegularizer): The Regularizer which will + be applied on the parameter. Default: None + gradient_clip_attr(BaseGradientClipAttr): The gradint clip strategy + which will be applied on the parameter. Default: None + do_model_average(bool): True if the model average strategy will + be applied on this parameter. + """ + def __init__(self, block, shape, dtype, **kwargs): if shape is None or dtype is None: raise ValueError("Parameter must set shape and dtype") @@ -1299,6 +1786,7 @@ class Parameter(Variable): def to_string(self, throw_on_error, with_details=False): """ To debug string. + Args: throw_on_error(bool): raise exception when self is not initialized when throw_on_error is True @@ -1331,8 +1819,15 @@ _startup_program_ = Program() def default_startup_program(): """ - Get default startup program. In startup program, Paddle will initialize - parameters, initialize nccl handle, etc. + Get default/global startup program. + + The layer function in :code:`fluid.layers` will create parameters, readers, + NCCL handles as global variables. The :code:`startup_program` will + initialize them by the operators in startup program. The layer function will + append these initialization operators into startup program. + + This method will return the :code:`default` or the :code:`current` startup + program. Users can use :code:`fluid.program_guard` to switch program. Returns: Program: startup program @@ -1342,7 +1837,15 @@ def default_startup_program(): def default_main_program(): """ - Get default main program. The main program is used for training or testing. + Get default/global main program. The main program is used for training or + testing. + + All layer function in :code:`fluid.layers` will append operators and + variables to the :code:`default_main_program`. + + The :code:`default_main_program` is the default program in a lot of APIs. + For example, the :code:`Executor.run()` will execute the + :code:`default_main_program` when the program is not specified. Returns: Program: main program @@ -1384,20 +1887,34 @@ def switch_startup_program(program): @contextlib.contextmanager def program_guard(main_program, startup_program=None): """ - Switch program with `with` statement + Change the global main program and startup program with `with` statement. + Layer functions in the Python `with` block will append operators and + variables to the new main programs. + + Examples: + + >>> import paddle.fluid as fluid + >>> main_program = fluid.Program() + >>> startup_program = fluid.Program() + >>> with fluid.program_guard(main_program, startup_program): + >>> data = fluid.layers.data(...) + >>> hidden = fluid.layers.fc(...) + + Notes: The temporary :code:`Program` can be used if the user does not need + to construct either of startup program or main program. Examples: - >>> with program_guard(Program()): - >>> data = fluid.layers.data(...) - >>> hidden = fluid.layers.fc(...) + + >>> import paddle.fluid as fluid + >>> main_program = fluid.Program() + >>> # does not care about startup program. Just pass a temporary value. + >>> with fluid.program_guard(main_program, fluid.Program()): + >>> data = ... Args: - main_program(Program): New main program inside `with` statement + main_program(Program): New main program inside `with` statement. startup_program(Program): New startup program inside `with` statement. None means do not change startup program. - - Returns: - None """ if not isinstance(main_program, Program): raise TypeError("main_program should be Program") @@ -1414,11 +1931,12 @@ def program_guard(main_program, startup_program=None): def get_var(name, program=None): """ - Get a variable by name from the global block of a program + Get a variable by name from the global block of a program. + Args: name(str): name of the variable program(Program|None): program object. - If None, default_global_program() will be used. + If None, default_global_program() will be used. Returns: Variable diff --git a/python/paddle/fluid/inferencer.py b/python/paddle/fluid/inferencer.py index 6baac00905..a81e39695b 100644 --- a/python/paddle/fluid/inferencer.py +++ b/python/paddle/fluid/inferencer.py @@ -27,13 +27,30 @@ __all__ = ['Inferencer', ] class Inferencer(object): + """ + Inferencer High Level API. + + Args: + infer_func (Python func): Infer function that will return predict Variable + param_path (str): The path where the inference model is saved by fluid.io.save_params + place (Place): place to do the inference + parallel (bool): use parallel_executor to run the inference, it will use multi CPU/GPU. + + Examples: + .. code-block:: python + + def inference_program(): + x = fluid.layers.data(name='x', shape=[13], dtype='float32') + y_predict = fluid.layers.fc(input=x, size=1, act=None) + return y_predict + + place = fluid.CPUPlace() + inferencer = fluid.Inferencer( + infer_func=inference_program, param_path="/tmp/model", place=place) + + """ + def __init__(self, infer_func, param_path, place=None, parallel=False): - """ - :param infer_func: a function that will return predict Variable - :param param_path: the path where the inference model is saved by fluid.io.save_params - :param place: place to do the inference - :param parallel: use parallel_executor to run the inference, it will use multi CPU/GPU. - """ self.param_path = param_path self.scope = core.Scope() self.parallel = parallel @@ -60,9 +77,20 @@ class Inferencer(object): def infer(self, inputs, return_numpy=True): """ - :param inputs: a map of {"input_name": input_var} that will be feed into the inference program - to get the predict value - :return: the predict value of the inference model + Do Inference for Inputs + + Args: + inputs (map): a map of {"input_name": input_var} that will be feed into the inference program + return_numpy (bool): transform return value into numpy or not + + Returns: + Tensor or Numpy: the predict value of the inference model for the inputs + + Examples: + .. code-block:: python + + tensor_x = numpy.random.uniform(0, 10, [batch_size, 13]).astype("float32") + results = inferencer.infer({'x': tensor_x}) """ if not isinstance(inputs, dict): raise ValueError( diff --git a/python/paddle/fluid/initializer.py b/python/paddle/fluid/initializer.py index 4e132ed261..373e9c060d 100644 --- a/python/paddle/fluid/initializer.py +++ b/python/paddle/fluid/initializer.py @@ -15,28 +15,43 @@ import framework import numpy as np import contextlib +from framework import convert_np_dtype_to_dtype_ +from core import VarDesc __all__ = [ - 'Constant', 'Uniform', 'Normal', 'Xavier', 'force_init_on_cpu', - 'init_on_cpu', 'ConstantInitializer', 'UniformInitializer', - 'NormalInitializer', 'XavierInitializer' + 'Constant', 'Uniform', 'Normal', 'Xavier', 'Bilinear', 'MSRA', + 'force_init_on_cpu', 'init_on_cpu', 'ConstantInitializer', + 'UniformInitializer', 'NormalInitializer', 'XavierInitializer', + 'BilinearInitializer', 'MSRAInitializer' ] _force_init_on_cpu_ = False def force_init_on_cpu(): + """ + The flag of whether force to init variables on CPU. + + Examples: + .. code-block:: python + + if force_init_on_cpu(): + pass + + """ return _force_init_on_cpu_ @contextlib.contextmanager def init_on_cpu(): """ - Switch program with `with` statement + Force the variable to be inited on CPU. Examples: - >>> with init_on_cpu(): - >>> step = layers.create_global_var() + .. code-block:: python + + with init_on_cpu(): + step = layers.create_global_var() """ global _force_init_on_cpu_ @@ -102,14 +117,18 @@ class Initializer(object): class ConstantInitializer(Initializer): """Implements the constant initializer + + Args: + value (float): constant value to initialize the variable + + Examples: + .. code-block:: python + + fc = fluid.layers.fc(input=x, size=10, + param_attr=fluid.initializer.Constant(value=2.0)) """ def __init__(self, value=0.0, force_cpu=False): - """Constructor for ConstantInitializer - - Args: - value: constant value to initialize the variable - """ assert value is not None super(ConstantInitializer, self).__init__() self._value = value @@ -144,16 +163,20 @@ class ConstantInitializer(Initializer): class UniformInitializer(Initializer): """Implements the random uniform distribution initializer + + Args: + low (float): lower boundary of the uniform distribution + high (float): upper boundary of the uniform distribution + seed (int): random seed + + Examples: + .. code-block:: python + + fc = fluid.layers.fc(input=x, size=10, + param_attr=fluid.initializer.Uniform(low=-0.5, high=0.5)) """ def __init__(self, low=-1.0, high=1.0, seed=0): - """Constructor for UniformInitializer - - Args: - low: lower boundary of the uniform distribution - high: upper boundary of the uniform distribution - seed: random seed - """ assert low is not None assert high is not None assert high >= low @@ -194,17 +217,21 @@ class UniformInitializer(Initializer): class NormalInitializer(Initializer): - """Implements the random Normal(Gaussian) distribution initializer + """Implements the Random Normal(Gaussian) distribution initializer + + Args: + loc (float): mean of the normal distribution + scale (float): standard deviation of the normal distribution + seed (int): random seed + + Examples: + .. code-block:: python + + fc = fluid.layers.fc(input=x, size=10, + param_attr=fluid.initializer.Normal(loc=0.0, scale=2.0)) """ def __init__(self, loc=0.0, scale=1.0, seed=0): - """Constructor for NormalInitializer - - Args: - loc: mean of the normal distribution - scale: standard deviation of the normal distribution - seed: random seed - """ assert loc is not None assert scale is not None assert seed is not None @@ -244,39 +271,49 @@ class NormalInitializer(Initializer): class XavierInitializer(Initializer): - """Implements the Xavier initializer - + """ This class implements the Xavier weight initializer from the paper - Understanding the difficulty of training deep feedforward neural - networks[1] by Xavier Glorot and Yoshua Bengio. + `Understanding the difficulty of training deep feedforward neural + networks `_ + by Xavier Glorot and Yoshua Bengio. This initializer is designed to keep the scale of the gradients approximately same in all the layers. In case of Uniform distribution, - the range is [-x, x], where x = sqrt(6 / (fan_in + fan_out)). + the range is [-x, x], where + + .. math:: + + x = \sqrt{\\frac{6.0}{fan\_in + fan\_out}} + In case of Normal distribution, the mean is 0 and the standard deviation - is sqrt(2/ (fan_in + fan_out)). + is + + .. math:: + + \sqrt{\\frac{2.0}{fan\_in + fan\_out}} + + + Args: + uniform (bool): whether to use uniform or normal distribution + fan_in (float): fan_in for Xavier initialization. If None, it is + inferred from the variable. + fan_out (float): fan_out for Xavier initialization. If None, it is + inferred from the variable. + seed (int): random seed + + Note: + It is recommended to set fan_in and fan_out to None for most cases. + + Examples: + .. code-block:: python + + fc = fluid.layers.fc( + input=queries, size=10, + param_attr=fluid.initializer.Xavier(uniform=False)) - References: - [1] Understanding the difficulty of training deep feedforward neural - networks. International conference on artificial intelligence and - statistics. - (http://proceedings.mlr.press/v9/glorot10a.html) """ def __init__(self, uniform=True, fan_in=None, fan_out=None, seed=0): - """Constructor for XavierInitializer - - Args: - uniform: whether to use uniform or normal distribution - fan_in: fan_in for Xavier initialization. If None, it is - inferred from the variable. - fan_out: fan_out for Xavier initialization. If None, it is - inferred from the variable. - seed: random seed - - Note: It is recommended to set fan_in and fan_out to None for - most cases. - """ assert uniform is not None assert seed is not None super(XavierInitializer, self).__init__() @@ -340,30 +377,42 @@ class MSRAInitializer(Initializer): """Implements the MSRA initializer a.k.a. Kaiming Initializer This class implements the weight initialization from the paper - Delving Deep into Rectifiers: Surpassing Human-Level Performance on - ImageNet Classification[1] by Kaiming He, Xiangyu Zhang, Shaoqing Ren - and Jian Sun. This is a robust initialization method that particularly - considers the rectifier nonlinearities. In case of Uniform distribution, - the range is [-x, x], where x = sqrt(6 / fan_in). In case of Normal - distribution, the mean is 0 and the standard deviation - is sqrt(2/ fan_in). - - References: - [1] Delving Deep into Rectifiers: Surpassing Human-Level Performance - on ImageNet Classification - (https://arxiv.org/abs/1502.01852) + `Delving Deep into Rectifiers: Surpassing Human-Level Performance on + ImageNet Classification `_ + by Kaiming He, Xiangyu Zhang, Shaoqing Ren and Jian Sun. This is a + robust initialization method that particularly considers the rectifier + nonlinearities. In case of Uniform distribution, the range is [-x, x], where + + .. math:: + + x = \sqrt{\\frac{6.0}{fan\_in}} + + In case of Normal distribution, the mean is 0 and the standard deviation + is + + .. math:: + + \sqrt{\\frac{2.0}{fan\_in}} + + Args: + uniform (bool): whether to use uniform or normal distribution + fan_in (float): fan_in for MSRAInitializer. If None, it is\ + inferred from the variable. + seed (int): random seed + + Note: + It is recommended to set fan_in to None for most cases. + + Examples: + .. code-block:: python + + fc = fluid.layers.fc( + input=queries, size=10, + param_attr=fluid.initializer.MSRA(uniform=False)) """ def __init__(self, uniform=True, fan_in=None, seed=0): """Constructor for MSRAInitializer - - Args: - uniform: whether to use uniform or normal distribution - fan_in: fan_in for MSRAInitializer. If None, it is - inferred from the variable. - seed: random seed - - Note: It is recommended to set fan_in to None for most cases. """ assert uniform is not None assert seed is not None @@ -422,6 +471,104 @@ class MSRAInitializer(Initializer): return op +class BilinearInitializer(Initializer): + """ + This initializer can be used in transposed convolution operator to + act as upsampling. Users can upsample a feature map with shape of + (B, C, H, W) by any integer factor. The usage is: + + Examples: + + .. code-block:: python + + factor = 2 + w_attr = ParamAttr(learning_rate=0., regularizer=L2Decay(0.), + initializer=Bilinear()) + conv_up = fluid.layers.conv2d_transpose( + input, + num_filters=C, + output_size=None, + filter_size=2 * factor - factor % 2, + padding=ceil((factor - 1) / 2.), + stride=factor, + groups=C, + param_attr=w_attr, + bias_attr=False) + + Where, `num_filters=C` and `groups=C` means this is channel-wise transposed + convolution. The filter shape will be (C, 1, K, K) where K is `filer_size`, + This initializer will set a (K, K) interpolation kernel for every channel + of the filter identically. The resulting shape of the output feature map + will be (B, C, factor * H, factor * W). Note that the learning rate and the + weight decay are set to 0 in order to keep coefficient values of bilinear + interpolation unchanged during training. + + """ + + def __init__(self): + """Constructor for BilinearInitializer. + """ + super(BilinearInitializer, self).__init__() + + def __call__(self, var, block): + """Add biliear initialization ops for a variable + + Args: + var (Variable): Variable that needs to be initialized. + block (Block): The block in which initialization ops should + be added. + + Returns: + Operator: the initialization op + + Raises: + ValueError: If type of `var` and `block` is not right. + If the shape of `var` size is not 4 and + var.shape[2] != var.shape[3]. + """ + if not isinstance(var, framework.Variable): + raise ValueError("var must be framework.Variable.") + + if not isinstance(block, framework.Block): + raise ValueError("block must be framework.Block.") + + shape = var.shape + if len(shape) != 4: + raise ValueError("the length of shape must be 4.") + if shape[2] != shape[3]: + raise ValueError("shape[2] must be equal to shape[3].") + + weight = np.zeros(np.prod(var.shape), dtype='float32') + size = shape[3] + # factor + f = np.ceil(size / 2.) + # center + c = (2 * f - 1 - f % 2) / (2. * f) + for i in range(np.prod(shape)): + x = i % size + y = (i / size) % size + weight[i] = (1 - abs(x / f - c)) * (1 - abs(y / f - c)) + weight = np.reshape(weight, shape) + + if var.dtype == VarDesc.VarType.FP32: + value_name = "fp32_values" + values = [float(v) for v in weight.flat] + else: + raise ValueError("Unsupported dtype %s", input.dtype) + if np.prod(shape) > 1024 * 1024: + raise ValueError("The size of input is too big. ") + op = block.append_op( + type='assign_value', + outputs={'Out': [var]}, + attrs={ + 'dtype': var.dtype, + 'shape': list(shape), + value_name: values + }) + var.op = op + return op + + # We short the class name, since users will use the initializer with the package # name. The sample code: # @@ -436,3 +583,4 @@ Uniform = UniformInitializer Normal = NormalInitializer Xavier = XavierInitializer MSRA = MSRAInitializer +Bilinear = BilinearInitializer diff --git a/python/paddle/fluid/io.py b/python/paddle/fluid/io.py index 6323c9899e..5c8f4f6507 100644 --- a/python/paddle/fluid/io.py +++ b/python/paddle/fluid/io.py @@ -13,11 +13,12 @@ # limitations under the License. import os +import errno import time import shutil from paddle.fluid.evaluator import Evaluator -from paddle.fluid.framework import Program, Parameter, default_main_program, Variable +from paddle.fluid.framework import Program, Parameter, default_main_program, default_startup_program, Variable from . import core __all__ = [ @@ -25,25 +26,48 @@ __all__ = [ 'load_persistables', 'save_inference_model', 'load_inference_model', 'get_inference_program', 'save_checkpoint', 'load_checkpoint', 'clean_checkpoint', 'load_persist_vars_without_grad', - 'save_persist_vars_without_grad', 'get_latest_checkpoint_serial' + 'load_lookup_table_vars', 'save_persist_vars_without_grad', + 'get_latest_checkpoint_serial' ] def is_parameter(var): - """Check whether the variable is a Parameter. - - This function checks whether the input variable is a Parameter. + """ + Check whether the given variable is an instance of Parameter. Args: - var : The input variable. + var(Variable): The variable to be checked. Returns: - boolean result whether the variable is a Parameter. + bool: True if the given `var` is an instance of Parameter, + False if not. + + Examples: + .. code-block:: python + + param = fluid.default_main_program().global_block().var('fc.w') + res = fluid.io.is_parameter(param) """ return isinstance(var, Parameter) def is_persistable(var): + """ + Check whether the given variable is persistable. + + Args: + var(Variable): The variable to be checked. + + Returns: + bool: True if the given `var` is persistable + False if not. + + Examples: + .. code-block:: python + + param = fluid.default_main_program().global_block().var('fc.w') + res = fluid.io.is_persistable(param) + """ if var.desc.type() == core.VarDesc.VarType.FEED_MINIBATCH or \ var.desc.type() == core.VarDesc.VarType.FETCH_LIST: return False @@ -68,20 +92,69 @@ def save_vars(executor, predicate=None, filename=None): """ - Save variables to directory by executor. + Save variables to the given directory by executor. + + There are two ways to specify variables to be saved: The first way, list + variables in a list and assign it to the `vars`. The second way, assign the + `main_program` with an existing program, then all variables in the program + will be saved. The first way has a higher priority. In other words, if `vars` + are assigned, the `main_program` and the `predicate` will be ignored. + + The `dirname` are used to specify the folder where to save variables. + If you prefer to save variables in separate files in the folder `dirname`, + set `filename` None; if you prefer to save all variables in a single file, + use `filename` to specify it. + + Args: + executor(Executor): The executor to run for saving variables. + dirname(str): The directory path. + main_program(Program|None): The program whose variables will be saved. + If it is None, the default main program will + be used automatically. + Default: None + vars(list[Variable]|None): The list that contains all variables to save. + It has a higher priority than the `main_program`. + Default: None + predicate(function|None): If it is not None, only variables in the + `main_program` that makes predicate(variable)==True + will be saved. It only works when we are using the + `main_program` to specify variables (In other words + `vars` is None). + Default: None + filename(str|None): The file which to save all variables. If you prefer to save + variables separately, set it to None. + Default: None + + Returns: + None + + Raises: + TypeError: If `main_program` is not an instance of Program nor None. + + Examples: + .. code-block:: python - :param executor: executor that save variable - :param dirname: directory path - :param main_program: program. If vars is None, then filter all variables in this - program which fit `predicate`. Default default_main_program. - :param predicate: The Predicate describes a callable that returns a variable - as a bool. If it returns true, the corresponding input variable will be saved. - :param vars: variables need to be saved. If vars is specified, program & predicate - will be ignored - :param filename: The name of a single file that all vars are saved to. - If it is None, save variables to separate files. + exe = fluid.Executor(fluid.CPUPlace()) + param_path = "./my_paddle_model" - :return: None + # The first usage: using `main_program` to specify variables + def name_has_fc(var): + res = "fc" in var.name + return res + + prog = fluid.default_main_program() + fluid.io.save_vars(executor=exe, dirname=path, main_program=prog, + vars=None) + # All variables in `main_program` whose name includes "fc" will be saved. + # And variables are going to be saved separately. + + + # The second usage: using `vars` to specify variables + var_list = [var_a, var_b, var_c] + fluid.io.save_vars(executor=exe, dirname=path, vars=var_list, + filename="vars_file") + # var_a, var_b and var_c will be saved. And they are going to be + # saved in the same file named 'var_file' in the path "./my_paddle_model". """ if vars is None: if main_program is None: @@ -129,7 +202,42 @@ def save_vars(executor, def save_params(executor, dirname, main_program=None, filename=None): """ - Save all parameters to directory with executor. + This function filters out all parameters from the give `main_program` + and then save them to the folder `dirname` or the file `filename`. + + Use the `dirname` to specify the saving folder. If you would like to + save parameters in separate files, set `filename` None; if you would + like to save all parameters in a single file, use `filename` to specify + the file name. + + NOTICE: Some variables are not Parameter while they are necessary for + training. So you can NOT save and continue your training just by + `save_params()` and `load_params()`. Please use `save_persistables()` + and `load_persistables()` instead. + + Args: + executor(Executor): The executor to run for saving parameters. + dirname(str): The saving directory path. + main_program(Program|None): The program whose parameters will be + saved. If it is None, the default + main program will be used automatically. + Default: None + filename(str|None): The file to save all parameters. If you prefer + to save parameters in differnet files, set it + to None. + Default: None + + Returns: + None + + Examples: + .. code-block:: python + + exe = fluid.Executor(fluid.CPUPlace()) + param_path = "./my_paddle_model" + prog = fluid.default_main_program() + fluid.io.save_params(executor=exe, dirname=param_path, + main_program=None) """ save_vars( executor, @@ -142,7 +250,37 @@ def save_params(executor, dirname, main_program=None, filename=None): def save_persistables(executor, dirname, main_program=None, filename=None): """ - Save all persistables to directory with executor. + This function filters out all variables with `persistable==True` from the + give `main_program` and then saves these variables to the folder `dirname` + or file `filename`. + + The `dirname` is used to specify the folder where persistable variables + are going to be saved. If you would like to save variables in separate + files, set `filename` None; if you would like to save all variables in a + single file, use `filename` to specify the file name. + + Args: + executor(Executor): The executor to run for saving persistable variables. + dirname(str): The directory path. + main_program(Program|None): The program whose persistbale variables will + be saved. If it is None, the default main + program will be used automatically. + Default: None + filename(str|None): The file to saved all variables. If you prefer to + save variables in differnet files, set it to None. + Default: None + + Returns: + None + + Examples: + .. code-block:: python + + exe = fluid.Executor(fluid.CPUPlace()) + param_path = "./my_paddle_model" + prog = fluid.default_main_program() + fluid.io.save_persistables(executor=exe, dirname=param_path, + main_program=None) """ save_vars( executor, @@ -160,20 +298,69 @@ def load_vars(executor, predicate=None, filename=None): """ - Load variables from directory by executor. + Load variables from the given directory by executor. + + There are two ways to specify variables to be loaded: The first way, list + variables in a list and assign it to the `vars`. The second way, assign the + `main_program` with an existing program, then all variables in the program + will be loaded. The first way has a higher priority. In other words if `vars` + are assigned, the `main_program` and the `predicate` will be ignored. - :param executor: executor that load variable - :param dirname: directory path - :param main_program: program. If vars is None, then filter all variables in this - program which fit `predicate`. Default default_main_program(). - :param predicate: The Predicate describes a callable that returns a variable - as a bool. If it returns true, the corresponding input variable will be loaded. - :param vars: variables need to be loaded. If vars is specified, program & - predicate will be ignored - :param filename: The name of the single file that all vars are loaded from. - If it is None, load variables from separate files. + The `dirname` are used to specify the folder where to load variables. + If variables were saved in separate files in the folder `dirname`, + set `filename` None; if all variables were saved in a single file, + use `filename` to specify it. - :return: None + Args: + executor(Executor): The executor to run for loading variables. + dirname(str): The directory path. + main_program(Program|None): The program whose variables will be loaded. + If it is None, the default main program will + be used automatically. + Default: None + vars(list[Variable]|None): The list that contains all variables to load. + It has a higher priority than the `main_program`. + Default: None + predicate(function|None): If it is not None, only variables in the + `main_program` that makes predicate(variable)==True + will be loaded. It only works when we are using the + `main_program` to specify variables (In other words + `vars` is None). + Default: None + filename(str|None): The file which saved all required variables. If variables + were saved in differnet files, set it to None. + Default: None + + Returns: + None + + Raises: + TypeError: If `main_program` is not an instance of Program nor None. + + Examples: + .. code-block:: python + + exe = fluid.Executor(fluid.CPUPlace()) + param_path = "./my_paddle_model" + + # The first usage: using `main_program` to specify variables + def name_has_fc(var): + res = "fc" in var.name + return res + + prog = fluid.default_main_program() + fluid.io.load_vars(executor=exe, dirname=path, main_program=prog, + vars=None) + # All variables in `main_program` whose name includes "fc" will be loaded. + # And all the variables are supposed to have been saved in differnet files. + + + # The second usage: using `vars` to specify variables + var_list = [var_a, var_b, var_c] + fluid.io.load_vars(executor=exe, dirname=path, vars=var_list, + filename="vars_file") + # var_a, var_b and var_c will be loaded. And they are supposed to haven + # been saved in the same file named 'var_file' in the path "./my_paddle_model". """ if vars is None: if main_program is None: @@ -221,7 +408,42 @@ def load_vars(executor, def load_params(executor, dirname, main_program=None, filename=None): """ - load all parameters from directory by executor. + This function filters out all parameters from the give `main_program` + and then trys to load these parameters from the folder `dirname` or + the file `filename`. + + Use the `dirname` to specify the folder where parameters were saved. If + parameters were saved in separate files in the folder `dirname`, set + `filename` None; if all parameters were saved in a single file, use + `filename` to specify the file name. + + NOTICE: Some variables are not Parameter while they are necessary for + training. So you can NOT save and continue your training just by + `save_params()` and `load_params()`. Please use `save_persistables()` + and `load_persistables()` instead. + + Args: + executor(Executor): The executor to run for loading parameters. + dirname(str): The directory path. + main_program(Program|None): The program whose parameters will be + loaded. If it is None, the default + main program will be used automatically. + Default: None + filename(str|None): The file which saved all parameters. If parameters + were saved in differnet files, set it to None. + Default: None + + Returns: + None + + Examples: + .. code-block:: python + + exe = fluid.Executor(fluid.CPUPlace()) + param_path = "./my_paddle_model" + prog = fluid.default_main_program() + fluid.io.load_params(executor=exe, dirname=param_path, + main_program=None) """ load_vars( executor, @@ -233,7 +455,37 @@ def load_params(executor, dirname, main_program=None, filename=None): def load_persistables(executor, dirname, main_program=None, filename=None): """ - load all persistables from directory by executor. + This function filters out all variables with `persistable==True` from the + give `main_program` and then trys to load these variables from the folder + `dirname` or the file `filename`. + + Use the `dirname` to specify the folder where persistable variables were + saved. If variables were saved in separate files, set `filename` None; + if all variables were saved in a single file, use `filename` to specify + the file name. + + Args: + executor(Executor): The executor to run for loading persistable variables. + dirname(str): The directory path. + main_program(Program|None): The program whose persistbale variables will + be loaded. If it is None, the default main + program will be used automatically. + Default: None + filename(str|None): The file which saved all variables. If variables were + saved in differnet files, set it to None. + Default: None + + Returns: + None + + Examples: + .. code-block:: python + + exe = fluid.Executor(fluid.CPUPlace()) + param_path = "./my_paddle_model" + prog = fluid.default_main_program() + fluid.io.load_persistables(executor=exe, dirname=param_path, + main_program=None) """ load_vars( executor, @@ -306,22 +558,48 @@ def save_inference_model(dirname, model_filename=None, params_filename=None): """ - Build a model especially for inference, - and save it to directory by the executor. + Prune the given `main_program` to build a new program especially for inference, + and then save it and all related parameters to given `dirname` by the `executor`. + + Args: + dirname(str): The directory path to save the inference model. + feeded_var_names(list[str]): Names of variables that need to be feeded data + during inference. + target_vars(list[Variable]): Variables from which we can get inference + results. + executor(Executor): The executor that saves the inference model. + main_program(Program|None): The original program, which will be pruned to + build the inference model. If is setted None, + the default main program will be used. + Default: None. + model_filename(str|None): The name of file to save the inference program + itself. If is setted None, a default filename + `__model__` will be used. + params_filename(str|None): The name of file to save all related parameters. + If it is setted None, parameters will be saved + in separate files . + + Returns: + None - :param dirname: directory path - :param feeded_var_names: Names of variables that need to be feeded data during inference - :param target_vars: Variables from which we can get inference results. - :param executor: executor that save inference model - :param main_program: original program, which will be pruned to build the inference model. - Default default_main_program(). - :param model_filename: The name of file to save inference program. - If not specified, default filename `__model__` will be used. - :param params_filename: The name of file to save parameters. - It is used for the case that all parameters are saved in a single binary file. - If not specified, parameters are considered saved in separate files. + Raises: + ValueError: If `feed_var_names` is not a list of basestring. + ValueError: If `target_vars` is not a list of Variable. + + Examples: + .. code-block:: python + + exe = fluid.Executor(fluid.CPUPlace()) + path = "./infer_model" + fluid.io.save_inference_model(dirname=path, feeded_var_names=['img'], + target_vars=[predict_var], executor=exe) + + # In this exsample, the function will prune the default main program + # to make it suitable for infering the `predict_var`. The pruned + # inference program is going to be saved in the "./infer_model/__model__" + # and parameters are going to be saved in separate files under folder + # "./infer_model". - :return: None """ if isinstance(feeded_var_names, basestring): feeded_var_names = [feeded_var_names] @@ -382,18 +660,49 @@ def load_inference_model(dirname, """ Load inference model from a directory - :param dirname: directory path - :param executor: executor that load inference model - :param model_filename: The name of file to load inference program. - If not specified, default filename `__model__` will be used. - :param params_filename: The name of file to load parameters. - It is used for the case that all parameters are saved in a single binary file. - If not specified, parameters are considered saved in separate files. + Args: + dirname(str): The directory path + executor(Executor): The executor to run for loading inference model. + model_filename(str|None): The name of file to load inference program. + If it is None, the default filename + '__model__' will be used. + Default: None + params_filename(str|None): The name of file to load all parameters. + It is only used for the case that all + parameters were saved in a single binary + file. If parameters were saved in separate + files, set it as 'None'. + + Returns: + tuple: The return of this function is a tuple with three elements: + (program, feed_target_names, fetch_targets). The `program` is a + Program, it's the program for inference. The `feed_target_names` is + a list of str, it contains Names of variables that need to feed + data in the inference program. The `fetch_targets` is a list of + Variable. It contains variables from which we can get inference + results. + + Raises: + ValueError: If `dirname` is not a existing directory. + + Examples: + .. code-block:: python + + exe = fluid.Executor(fluid.CPUPlace()) + path = "./infer_model" + [inference_program, feed_target_names, fetch_targets] = + fluid.io.load_inference_model(dirname=path, executor=exe) + results = exe.run(inference_program, + feed={feed_target_names[0]: tensor_img}, + fetch_list=fetch_targets) + + # In this exsample, the inference program was saved in the + # "./infer_model/__model__" and parameters were saved in + # separate files in ""./infer_model". + # After getting inference program, feed target names and + # fetch targets, we can use an Executor to run the inference + # program to get the inference result. - :return: [program, feed_target_names, fetch_targets] - program: program especially for inference. - feed_target_names: Names of variables that need to feed data - fetch_targets: Variables from which we can get inference results. """ if not os.path.isdir(dirname): raise ValueError("There is no directory named '%s'", dirname) @@ -424,12 +733,25 @@ def load_inference_model(dirname, def get_parameter_value(para, executor): """ - Get the LoDTensor for the parameter + Get the LoDTensor value of the given parameter. + + Args: + para(Parameter): The parameter to get value from. + executor(Executor): The executor to run for retrieving the value. - :param executor: executor for retrieving the value - :param para: the given parameter + Returns: + numpy.array: The given parameter's values. + + Raises: + AssertionError: If the `para` is not an instance of Parameter. + + Examples: + .. code-block:: python + + exe = fluid.Executor(fluid.CPUPlace()) + param = fluid.default_main_program().global_block().var('fc.w') + p = fluid.io.get_parameter_value(param, exe) - :return: the LoDTensor for the parameter """ assert is_parameter(para) @@ -441,14 +763,30 @@ def get_parameter_value(para, executor): def get_parameter_value_by_name(name, executor, program=None): """ - Get the LoDTensor for paramter with the given name + Get the LoDTensor value of a certain parameter by its name. + + Args: + name(str): The parameter's name. + executor(Executor): The executor to run for retrieving the value. + program(Program | None): The program where to find the parameter. + If it's set to be None, the function will + try to find the parameter in the default + main program. - :param executor: executor for retrieving the value - :param name: the name of the parameter - :param program: the program where the variable is found - Default default_main_program(). + Returns: + numpy.array: The parameter's values. + + Raises: + TypeError: If given `name` is not an instance of basestring. + TypeError: If the parameter with the given name doesn't exist. + AssertionError: If there is a varibale named `name` in the + given program but it is not a Parameter. + + Examples: + .. code-block:: python - :return: the LoDTensor for the variable + exe = fluid.Executor(fluid.CPUPlace()) + p = fluid.io.get_parameter_value('fc.w', exe) """ if program is None: program = default_main_program() @@ -459,6 +797,7 @@ def get_parameter_value_by_name(name, executor, program=None): SUCCESS_MARK_FILENAME = "_SUCCESS" CHECKPOINT_PREFIX = "checkpoint" MODEL_DIR = "__model__" +LOOKUP_TABLE_DIR = "__lookup_table__" TRAINER_PREFIX = "trainer" CHECKPOINT_SEPARATOR = "_" @@ -468,48 +807,145 @@ def save_checkpoint(executor, trainer_id, trainer_args=None, main_program=None, - max_num_checkpoints=3): + max_num_checkpoints=3, + lookup_table=None, + ps_endpoint_list=None): """ - Save Checkpoint will save persistable LodTensor variables from main_program in checkpoint directory, - the directory named by serial number from 0 to (n -1), save_checkpoint use LRU strategy - to keep numbers of checkpoint directory, the numbers of checkpoint directory are max_num_checkpoints at most, - The interval between two saved checkpoints must greater than save_interval_secs. + This function filters out all checkpoint variables from the give + main_program and then saves these variables to the `checkpoint_dir` + directory. + + In the training precess, we generally save a checkpoint in each + iteration. So there might be a lot of checkpoints in the + `checkpoint_dir`. To avoid them taking too much disk space, the + `max_num_checkpoints` are introduced to limit the total number of + checkpoints. If the number of existing checkpints is greater than + the `max_num_checkpoints`, oldest ones will be scroll deleted. + + A variable is a checkpoint variable and will be saved if it meets + all following conditions: + 1. It's persistable. + 2. It's type is not FEED_MINIBATCH nor FETCH_LIST nor RAW. + 3. It's name contains no "@GRAD" nor ".trainer_" nor ".block". + + Args: + executor(Executor): The executor to run for save checkpoint. + checkpoint_dir(str): The folder where to save checkpoints. + trainer_id(int): currect trainer id, if id is equal to 0, the trainer + is chief. + trainer_args(dict|None): Current training arguments. Such as 'epoch_id' + and 'step_id'. + Defaut: None + main_program(Program|None): The program whose checkpoint variables will + be saved. If it is None, the default main program will be used. + max_num_checkpoints(int): The max number of total number of existing + checkpoints. + Default: 3 + lookup_table(string|None): the lookup table name, when use distribute + lookup table, we can get lookup table name by DistributeTranspiler. + table_name + ps_endpoint_list(list|None): the parameter server ip:port list. + when use distribute lookup table, we can get ps_endpoint_list by + distribute arguments. - :param executor executor for save the value - :param checkpoint_dir the checkpoint directory - :param trainer_id currect trainer id, if id is equal to 0, the trainer is chief - :param main_program will save all variables in program - :param max_num_checkpoints will keep numbers of checkpoint serials not bigger than max_num_checkpoints + Returns: + None + + Raises: + ValueError: If `checkpoint_dir` is None. + AssertionError: If `trainer_args` is not a dict. + + Examples: + .. code-block:: python + + exe = fluid.Executor(fluid.CPUPlace()) + path = "./checkpoints" + prog = fluid.default_main_program() + trainer_args = {"epoch_id": 200, + "step_id": 20} # just an example + table_name = "share_w" + ps_endpoints = ["127.0.0.1:6000","127.0.0.1:6001"] + + fluid.io.save_checkpoint(executor=exe, + checkpoint_dir=path, + trainer_id=0, + trainer_args=trainer_args, + main_program=prog, + max_num_checkpoints=3, + lookup_table=table_name, + ps_endpoint_list = ps_endpoints) """ if checkpoint_dir is None: raise ValueError("'checkpoint_dir' should not be None") + assert checkpoint_dir if trainer_args: assert isinstance(trainer_args, dict) - if not os.path.isdir(checkpoint_dir): - os.makedirs(checkpoint_dir) + is_chief = trainer_id == 0 + _make_chekcpoint_dirs(checkpoint_dir) serial = get_latest_checkpoint_serial(checkpoint_dir) + 1 cur_dir = _get_serial_dir(checkpoint_dir, serial) save_trainer_args(cur_dir, trainer_id, trainer_args) - if trainer_id == 0: + if is_chief: save_persist_vars_without_grad(executor, cur_dir, main_program) + if is_chief and lookup_table and ps_endpoint_list: + save_pserver_vars_by_notify(executor, cur_dir, lookup_table, + ps_endpoint_list) + _scroll_delete(checkpoint_dir, max_num_checkpoints) def load_checkpoint(executor, checkpoint_dir, serial, main_program): """ - Load checkpoint from a directory by executor, - it will find the most recent saved checkpoint file and load it auto. + This function filters out all checkpoint variables from the give + main_program and then try to load these variables from the + `checkpoint_dir` directory. + + In the training precess, we generally save a checkpoint in each + iteration. So there are more than one checkpoint in the + `checkpoint_dir` (each checkpoint has its own sub folder), use + `serial` to specify which serial of checkpoint you would like to + load. + + A variable is a checkpoint variable and will be loaded if it meets + all following conditions: + 1. It's persistable. + 2. It's type is not FEED_MINIBATCH nor FETCH_LIST nor RAW. + 3. It's name contains no "@GRAD" nor ".trainer_" nor ".block". + + Args: + executor(Executor): The executor to run for loading checkpoint. + checkpoint_dir(str): The folder where all checkpoints are. + serial(int): The serial of checkpoint you would like to load. + main_program(Program): The program whose checkpoint variables will + be loaded. - :param executor executor for load the value - :param checkpoint_dir the checkpoint directory - :param serial the serial folder in checkpoint directory will be load - :param main_program will load all variables in program + Returns: + None + + Raises: + ValueError: If `checkpoint_dir` is None. + ValueError: If `serial` is None or `serial` is less than 0. + ValueError: If `main_program` is None. + + Examples: + .. code-block:: python + + exe = fluid.Executor(fluid.CPUPlace()) + path = "./checkpoints" + prog = fluid.default_main_program() + fluid.io.load_checkpoint(executor=exe, checkpoint_dir=path, + serial=9, main_program=prog) + + # In this example, `load_checkpoint` function + # will first filters out all checkpoint variables in the default + # main program, and then try to load these variables form the + # folder "./checkpoints/checkpoint_9/__model__". """ if checkpoint_dir is None: @@ -527,11 +963,12 @@ def load_checkpoint(executor, checkpoint_dir, serial, main_program): def clean_checkpoint(checkpoint_dir, delete_dir=False): """ - clean the checkpoint dir, when the train exits normally, the trainer will call clean_checkpoint to delete checkpoint directory saved before. + clean the checkpoint dir, when the train exits normally, + the trainer will call clean_checkpoint to delete checkpoint directory saved before. delete_dir only works when the directory is empty, otherwise, OSError is raised. - :param checkpoint_dir - :param delete_dir + : param checkpoint_dir + : param delete_dir """ if checkpoint_dir is None: @@ -547,13 +984,40 @@ def load_persist_vars_without_grad(executor, program, has_model_dir=False): """ - load_persist_vars_without_grad will load variables from a directory by an executor, - the variable named end with "@GRAD" will not be loaded. + This function filters out all checkpoint variables from the give + program and then trys to load these variables from the given directory. + + A variable is a checkpoint variable if it meets all following + conditions: + 1. It's persistable. + 2. It's type is not FEED_MINIBATCH nor FETCH_LIST nor RAW. + 3. It's name contains no "@GRAD" nor ".trainer_" nor ".block". + + Args: + executor(Executor): The executor to run for loading variables. + dirname(str): The directory path. + program(Program): The program whose checkpoint variables will + be loaded. + has_model_dir(bool): if True, the function loads variables + from a sub directory named '__model__'. + Default: False - :param executor executor for load the value - :param dirname the checkpoint directory - :param program will load all variables in program - :param has_model_dir if has_model_dir is True, will load variables from sub directory named __model__ + Returns: + None + + Examples: + .. code-block:: python + + exe = fluid.Executor(fluid.CPUPlace()) + param_path = "./my_paddle_model" + prog = fluid.default_main_program() + fluid.io.load_persist_vars_without_grad(executor=exe, + dirname=param_path, program=prog, has_model_dir=True) + + # In this example, `load_persist_vars_without_grad` function + # will first filters out all checkpoint variables in the default + # main program, and then trys to load these variables form the + # folder "./my_paddle_model/__model__". """ if has_model_dir: @@ -567,14 +1031,90 @@ def load_persist_vars_without_grad(executor, filename=None) +def load_lookup_table_vars(executor, dirname, program, pserver_id, table_name): + """ + The parameter server will load lookup table's local file in + selectedrows variable. + + Args: + executor(Executor): The executor to run for loading persistable variables + dirname(str): The directory path + main_program(Program): Find the variable named table_name in main_program + pserver_id(int): the serial number in pserver_endpoints list + table_name(str): lookup table name + + Returns: + None + + Examples: + .. code-block:: python + + exe = fluid.Executor(fluid.CPUPlace()) + dirname = "./checkpoints/checkpoint_9/__model__" + prog = fluid.default_main_program() + pserver_id = 1 + table_name = "share_w" + fluid.io.load_lookup_table_vars(executor=exe, + dirname=dirname, program=prog, pserver_id=pserver_id, + table_name=table_name) + """ + + for var in program.list_vars(): + if var.name == table_name: + lookup_table_var = var + break + + assert lookup_table_var is not None + + lookup_table_dir = os.path.join(dirname, LOOKUP_TABLE_DIR) + table_file = table_name + CHECKPOINT_SEPARATOR + str(pserver_id) + + load_prog = Program() + load_block = load_prog.global_block() + + load_block.append_op( + type='load', + inputs={}, + outputs={'Out': [lookup_table_var]}, + attrs={'file_path': os.path.join(lookup_table_dir, table_file)}) + + executor.run(load_prog) + + def save_persist_vars_without_grad(executor, dirname, program): """ - save_persist_vars_without_grad will save variables to a directory by an executor, - the variable named end with "@GRAD" will not be saved. + This function filters out all checkpoint variables from the give + program and then save these variables to a sub-folder '__model__' of + the given directory. + + A variable is a checkpoint variable if it meets all following + conditions: + 1. It's persistable. + 2. It's type is not FEED_MINIBATCH nor FETCH_LIST nor RAW. + 3. It's name contains no "@GRAD" nor ".trainer_" nor ".block". - :param executor executor for load the value - :param dirname the checkpoint directory - :param program will load all variables in program + Args: + executor(Executor): The executor to run for saving variables. + dirname(str): The directory path. + program(Program): The program whose checkpoint variables will + be saved. + + Returns: + None + + Examples: + .. code-block:: python + + exe = fluid.Executor(fluid.CPUPlace()) + param_path = "./my_paddle_model" + prog = fluid.default_main_program() + fluid.io.save_persist_vars_without_grad(executor=exe, + dirname=param_path, program=prog) + + # In this example, `save_persist_vars_without_grad` function + # will first filters out all checkpoint variables in the default + # main program, and then saves these variables to the folder + # "./my_paddle_model/__model__". """ cur_dir = _get_model_dir(dirname) save_vars( @@ -587,6 +1127,54 @@ def save_persist_vars_without_grad(executor, dirname, program): _write_success(cur_dir) +def save_pserver_vars_by_notify(executor, dirname, lookup_table, + ps_endpoint_list): + """ + This function will send checkpoint notify message from Trainer 0 + to all the pservers. + The checkpoint notify message contains lookup table name, + the absolute path on pserver to save lookup_table. + + Args: + executor(Executor): The executor to run for send checkpoint notify. + dirname(str): The folder where to save checkpoints. + lookup_table(string): the lookup table name, when use distribute + lookup table, we can get lookup table name by DistributeTranspiler. + table_name + ps_endpoint_list(list): the parameter server ip:port list. + when use distribute lookup table, we can get ps_endpoint_list by + distribute arguments. + Return: + None + + Examples: + .. code-block:: python + + exe = fluid.Executor(fluid.CPUPlace()) + param_path = "./my_paddle_model" + prog = fluid.default_main_program() + table_name = "share_w" + ps_endpoints = ["127.0.0.1:6000","127.0.0.1:6001"] + + fluid.io.save_pserver_vars_by_notify(executor=exe, + dirname=param_path, lookup_table=table_name, + ps_endpoint_list=ps_endpoints) + """ + cur_dir = _get_lookuptable_dir(dirname) + + checkpoint_notify_program = Program() + checkpoint_notify_block = checkpoint_notify_program.global_block() + + attrs = {} + attrs['epmap'] = ps_endpoint_list + attrs['dir'] = cur_dir + attrs['lookup_table'] = lookup_table + + checkpoint_notify_block.append_op( + type='checkpoint_notify', inputs={}, outputs={}, attrs=attrs) + executor.run(checkpoint_notify_program) + + def save_trainer_args(dirname, trainer_id, trainer_args): assert isinstance(trainer_args, dict) @@ -600,6 +1188,29 @@ def save_trainer_args(dirname, trainer_id, trainer_args): def load_trainer_args(checkpoint_dir, serial, trainer_id, trainer_args): + """ + trainer will load some args from it's independent directory, + such as epoch_id and step_id. + + Args: + checkpoint_dir(str): The folder where all checkpoints are. + serial(int): The serial of checkpoint you would like to load. + trainer_id(int): current trainer id. + trainer_args(list): list about load trainer args + Return: + None + + Examples: + .. code-block:: python + + param_path = "./checkpoint/" + serial = 7 + trainer_id = 2 + trainer_args = ["epoch_id", "step_id"] + + fluid.io.load_trainer_args(checkpoint_dir=param_path, serial=serial, + trainer_id=trainer_id, trainer_args=trainer_args) + """ assert isinstance(trainer_args, list) cur_dir = _get_serial_dir(checkpoint_dir, serial) @@ -620,7 +1231,7 @@ def _is_checkpoint_var(var): the checkpoint will not save or load all the variables. var type is FEED_MINIBATCH/FETCH_LIST/RAW or var name ends with @GRAD are discarded. - :param var + : param var(Variable) """ if var.desc.type() == core.VarDesc.VarType.FEED_MINIBATCH or \ var.desc.type() == core.VarDesc.VarType.FETCH_LIST or \ @@ -640,6 +1251,23 @@ def _is_checkpoint_var(var): return var.persistable +def _make_chekcpoint_dirs(dirs): + """ + _make_chekcpoint_dirs will makdir local directory directly, when the directory is exist, it will igore it. + """ + assert dirs is not None + + if os.path.isfile(dirs): + raise OSError(errno.ENOTDIR, "dirs path shoule be a Directory.", dirs) + + if not os.path.isdir(dirs): + try: + os.makedirs(dirs) + except OSError as err: + if err.errno != errno.EEXIST: + raise err + + def _get_dir_serial(dirname): _, serial = dirname.split(CHECKPOINT_SEPARATOR) @@ -653,29 +1281,27 @@ def _get_dir_serial(dirname): def _get_serial_dir(dirname, serial): serial_folder = CHECKPOINT_PREFIX + CHECKPOINT_SEPARATOR + str(serial) serial_dir = os.path.join(dirname, serial_folder) - - if not os.path.isdir(serial_dir): - os.makedirs(serial_dir) + _make_chekcpoint_dirs(serial_dir) return serial_dir def _get_model_dir(dirname): model_dir = os.path.join(dirname, MODEL_DIR) + _make_chekcpoint_dirs(model_dir) + return model_dir - if not os.path.isdir(model_dir): - os.makedirs(model_dir) - return model_dir +def _get_lookuptable_dir(dirname): + lookuptable_dir = os.path.join(dirname, LOOKUP_TABLE_DIR) + _make_chekcpoint_dirs(lookuptable_dir) + return lookuptable_dir def _get_trainer_dir(dirname, trainer_id): trainer_folder = TRAINER_PREFIX + CHECKPOINT_SEPARATOR + str(trainer_id) trainer_dir = os.path.join(dirname, trainer_folder) - - if not os.path.isdir(trainer_dir): - os.makedirs(trainer_dir) - + _make_chekcpoint_dirs(trainer_dir) return trainer_dir @@ -694,14 +1320,18 @@ def _scroll_delete(dirname, max_num_checkpoints=3): serials = serials[max_num_checkpoints:] for serial in serials: cur_dir = _get_serial_dir(dirname, serial) - shutil.rmtree(cur_dir) + try: + shutil.rmtree(cur_dir) + except OSError as err: + if err.errno != errno.ENOENT: + raise err def _write_success(dirname): """ write an empty file named "_SUCCESS" in checkpoint dir, indicate this checkpoint is correct. - :param dirname + : param dirname """ success_file = os.path.join(dirname, SUCCESS_MARK_FILENAME) with open(success_file, 'a') as f: @@ -713,7 +1343,7 @@ def get_latest_checkpoint_serial(checkpoint_dir): """ get the latest file in checkpoint directory, the _SUCCESS file must exist in the directory - :param checkpoint_dir + : param checkpoint_dir """ if not checkpoint_dir: return -1 @@ -744,3 +1374,101 @@ def get_latest_checkpoint_serial(checkpoint_dir): if success_num > current_dir: current_dir = success_num return current_dir + + +def get_test_program(filelist, program=None, startup_program=None): + """ + Transpile current train program to a program to read test dataset + if the program is using reader ops like "open_files_op". + """ + + def _copy_reader_var_(block, var, new_name=None): + if new_name == None: + new_name = var.name + new_var = block.create_var( + name=str(new_name), type=core.VarDesc.VarType.READER) + new_var.desc.set_shapes(var.desc.shapes()) + new_var.desc.set_dtypes(var.desc.dtypes()) + new_var.persistable = True + return new_var + + def _get_test_reader_name(train_reader_name): + return train_reader_name + "_test" + + def _is_reader_op(op): + block = op.block + if "Out" in op.output_names: + reader_out = block.vars[op.output("Out")[0]] + if reader_out.type == core.VarDesc.VarType.READER: + return True + return False + + if program == None: + program = default_main_program() + if startup_program == None: + startup_program = default_startup_program() + startup_block = startup_program.global_block() + + # 1. find out the orignal reader var name + startup_reader_op_list = [] + + for op in startup_block.ops: + if _is_reader_op(op): + startup_reader_op_list.append(op) + + if len(startup_reader_op_list) == 0: + return program + + root_reader_op = startup_reader_op_list[0] + train_test_reader_map = {} + # 2. add operators to startup to read open and read test data files + for op in startup_reader_op_list: + assert (len(op.output("Out")) == 1) + train_reader_name = op.output("Out")[0] + train_reader = startup_block.vars[train_reader_name] + test_reader = _copy_reader_var_( + startup_block, + train_reader, + new_name=_get_test_reader_name(train_reader_name)) + train_test_reader_map[train_reader.name] = test_reader + + test_op_inputs = {} + for name in op.input_names: + train_arg_names = op.input(name) + test_arg_vars = [] + for arg_name in train_arg_names: + arg_var = train_test_reader_map[ + arg_name] if name == "UnderlyingReader" else startup_block.vars[ + arg_name] + test_arg_vars.append(arg_var) + test_op_inputs[name] = test_arg_vars + + test_op = startup_block.append_op( + type=op.type, + inputs=test_op_inputs, + outputs={'Out': [test_reader]}, + attrs=op.attrs) + # root reader op's filelist attr for read test files + if op.type == root_reader_op.type: + test_op.set_attr("file_names", filelist) + if op.type == "create_multi_pass_reader": + test_op.set_attr("pass_num", 1) + + # 3. rename reader vars in inference program to different name + # to avoid read from train data. + main_block = program.global_block() + for var in main_block.vars.values(): + if var.type == core.VarDesc.VarType.READER: + main_block.rename_var( + str(var.name), str(_get_test_reader_name(var.name))) + + for op in main_block.ops: + if op.type == root_reader_op.type: + test_op.set_attr("file_names", filelist) + if op.type == "create_multi_pass_reader": + test_op.set_attr("pass_num", 1) + + startup_program.sync_with_cpp() + program.sync_with_cpp() + + return program diff --git a/python/paddle/fluid/layers/__init__.py b/python/paddle/fluid/layers/__init__.py index a568f61dcb..cd1492da24 100644 --- a/python/paddle/fluid/layers/__init__.py +++ b/python/paddle/fluid/layers/__init__.py @@ -28,8 +28,8 @@ import math_op_patch from math_op_patch import * import detection from detection import * -import metric -from metric import * +import metric_op +from metric_op import * from learning_rate_scheduler import * __all__ = [] @@ -41,5 +41,5 @@ __all__ += control_flow.__all__ __all__ += ops.__all__ __all__ += device.__all__ __all__ += detection.__all__ -__all__ += metric.__all__ +__all__ += metric_op.__all__ __all__ += learning_rate_scheduler.__all__ diff --git a/python/paddle/fluid/layers/control_flow.py b/python/paddle/fluid/layers/control_flow.py index 4db085e9f5..849474dc58 100644 --- a/python/paddle/fluid/layers/control_flow.py +++ b/python/paddle/fluid/layers/control_flow.py @@ -20,13 +20,13 @@ from ..framework import Program, Variable, Operator from ..layer_helper import LayerHelper, unique_name from ..initializer import force_init_on_cpu from ops import logical_and, logical_not, logical_or +import numpy __all__ = [ 'split_lod_tensor', 'merge_lod_tensor', 'BlockGuard', 'BlockGuardWithCompletion', - 'StaticRNNMemoryLink', 'WhileGuard', 'While', 'Switch', @@ -55,34 +55,36 @@ __all__ = [ def split_lod_tensor(input, mask, level=0): """ - **split_lod_tensor** - This function takes in an input that contains the complete lod information, and takes in a mask which is used to mask certain parts of the input. The output is the true branch and the false branch with the mask applied to - the input at a certain level in the tensor. + the input at a certain level in the tensor. Mainly used in IfElse to split + data into two parts. Args: input(tuple|list|None): The input tensor that contains complete lod information needed to construct the output. mask(list): A bool column vector which masks the input. - level(int): The specific lod level to rank. + level(int): The specific lod level to split. Returns: - Variable: The true branch of tensor as per the mask applied to input. - Variable: The false branch of tensor as per the mask applied to input. + tuple(Variable, Variable): + The true branch of tensor as per the mask applied to input. + + The false branch of tensor as per the mask applied to input. Examples: .. code-block:: python - x = layers.data(name='x', shape=[1]) + x = fluid.layers.data(name='x', shape=[1]) x.persistable = True - y = layers.data(name='y', shape=[1]) + y = fluid.layers.data(name='y', shape=[1]) y.persistable = True - out_true, out_false = layers.split_lod_tensor( + out_true, out_false = fluid.layers.split_lod_tensor( input=x, mask=y, level=level) + """ helper = LayerHelper('split_lod_tensor', **locals()) out_true = helper.create_tmp_variable(dtype=input.dtype) @@ -105,8 +107,9 @@ def merge_lod_tensor(in_true, in_false, x, mask, level=0): This function takes in an input :math:`x`, the True branch, the False branch and a binary :math:`mask`. Using this information, this function - merges the True and False branches of the tensor into a single Output - at a certain lod level indiacted by :math:`level`. + merges the True and False branches of the tensor into a single tensor as + output at a certain lod level indicated by :math:`level`. Used in IfElse + to merge the output if True block and False Block. Args: in_true(tuple|list|None): The True branch to be merged. @@ -114,7 +117,7 @@ def merge_lod_tensor(in_true, in_false, x, mask, level=0): x(tuple|list|None): The input tensor that contains complete lod information needed to construct the output. mask(list): A bool column vector which masks the input. - level(int): The specific lod level to rank. + level(int): The specific lod level to merge. Returns: Variable: The merged output tensor. @@ -182,12 +185,14 @@ def Print(input, Returns: Variable: Output tensor, same data with input tensor. + Examples: + .. code-block:: python - value = some_layer(...) - Print(value, summarize=10, - message="The content of some_layer: ") + value = some_layer(...) + Print(value, summarize=10, + message="The content of some_layer: ") ''' helper = LayerHelper('print', **locals()) out = helper.create_tmp_variable(dtype=helper.input_dtype()) @@ -233,9 +238,56 @@ class BlockGuard(object): class ParallelDo(object): """ - ParallelDo class. + ParallelDo is used to represent multi-thread data parallel processing. + + Its vanilla implementation can be shown as the following (:math:`|` means + single thread and :math:`||||` means multiple threads) + + .. code-block:: text + + In the forward pass + | Split input onto different devices + | Copy parameter onto different devices + |||| Compute forward pass in parallel + | Merge output from different devices - ParallelDo class is used to create a ParallelDo. + In the backward pass + | Split output@grad onto different devices + |||| Compute backward pass in parallel + | accumulate param@grad from different devices to the first device + | Merge input@grad from different devices + | Copy param@grad to the place of parallel_do_op + + Examples: + + .. code-block:: python + + images = fluid.layers.data(name='pixel', shape=[1, 28, 28], dtype=DTYPE) + label = fluid.layers.data(name='label', shape=[1], dtype='int64') + + # ParallelDo version & Single-thread version + if thread_num > 1: + places = fluid.layers.get_places(thread_num) + pd = fluid.layers.ParallelDo(places) + with pd.do(): + images = pd.read_input(images) + label = pd.read_input(label) + predict = cnn_model(images) + cost = fluid.layers.cross_entropy(input=predict, label=label) + + avg_cost = fluid.layers.mean(x=cost) + pd.write_output(avg_cost) + + avg_cost = pd() + avg_cost = fluid.layers.mean(avg_cost) + else: + predict = cnn_model(images) + cost = fluid.layers.cross_entropy(input=predict, label=label) + avg_cost = fluid.layers.mean(x=cost) + + .. warning:: + + It will be soon deprecated, please use ParallelExecutor instead. """ def __init__(self, places, use_nccl=False, name=None): @@ -362,16 +414,17 @@ class StaticRNNMemoryLink(object): """ StaticRNNMemoryLink class. - Args: - init: the initial variable for Memory - init: Variable - pre_mem: the memory variable in previous time step - pre_mem: Variable - mem: the memory variable in current time step - mem: Variable - StaticRNNMemoryLink class is used to create a link between two memory cells of a StaticRNN. + + + NOTE: This is a internal data structure of a very low-level API. + Please use StaticRNN instead. + + Args: + init(Variable): the initial variable for Memory. + pre_mem(Variable): the memory variable in previous time step. + mem(Variable): the memory variable in current time step. """ def __init__(self, init, pre_mem, mem=None): @@ -606,6 +659,29 @@ class WhileGuard(BlockGuard): class While(object): + """ + while loop control flow. + + Args: + cond (Variable): condition used to compare. + name (str): The name of this layer. + + Examples: + .. code-block:: python + + d0 = layers.data("d0", shape=[10], dtype='float32') + data_array = layers.array_write(x=d0, i=i) + array_len = layers.fill_constant(shape=[1],dtype='int64', value=3) + + cond = layers.less_than(x=i, y=array_len) + while_op = layers.While(cond=cond) + with while_op.block(): + d = layers.array_read(array=data_array, i=i) + i = layers.increment(x=i, in_place=True) + layers.array_write(result, i=i, array=d) + layers.less_than(x=i, y=array_len, cond=cond) + """ + BEFORE_WHILE_BLOCK = 0 IN_WHILE_BLOCK = 1 AFTER_WHILE_BLOCK = 2 @@ -675,8 +751,8 @@ def lod_rank_table(x, level=0): .. code-block:: text x is a LoDTensor: - x.lod = [[0, 2, 3], - [0, 5, 6, 7]] + x.lod = [[2, 1], + [5, 1, 1]] x.data = [a, b, c, d, e, f, g] 1. set level to 0: @@ -706,7 +782,7 @@ def lod_rank_table(x, level=0): .. code-block:: python x = fluid.layers.data(name='x', shape=[10], - dtype='float32', lod_level=1) + dtype='float32', lod_level=1) out = layers.lod_rank_table(x=x, level=0) """ helper = LayerHelper("lod_rank_table", **locals()) @@ -748,17 +824,25 @@ def max_sequence_len(rank_table): def lod_tensor_to_array(x, table): - """ Convert a LOD_TENSOR to an LOD_TENSOR_ARRAY. + """ + Convert a LoDTensor to a LoDTensorArray. + + This function split a LoDTesnor to a LoDTensorArray according to its LoD + information. LoDTensorArray is an alias of C++ std::vector in + PaddlePaddle. The generated LoDTensorArray of this function can be further read + or written by `read_from_array()` and `write_to_array()` operators. However, + this function is generally an internal component of PaddlePaddle `DynamicRNN`. + Users should not use it directly. Args: - x (Variable|list): The LOD tensor to be converted to a LOD tensor array. + x (Variable|list): The LoDTensor to be converted to a LoDTensorArray. table (ParamAttr|list): The variable that stores the level of lod which is ordered by sequence length in - descending order. + descending order. It is generally generated + by `layers.lod_rank_table()` API. Returns: - Variable: The variable of type array that has been converted from a - tensor. + Variable: The LoDTensorArray that has been converted from the input tensor. Examples: .. code-block:: python @@ -823,8 +907,7 @@ def increment(x, value=1.0, in_place=True): in_place (bool): If the increment should be performed in-place. Returns: - Variable: The tensor variable storing the transformation of - element-wise increment of each value in the input. + Variable: The elementwise-incremented object. Examples: .. code-block:: python @@ -866,7 +949,7 @@ def array_write(x, i, array=None): Variable: The output LOD_TENSOR_ARRAY where the input tensor is written. Examples: - .. code-block::python + .. code-block:: python tmp = fluid.layers.zeros(shape=[10], dtype='int32') i = fluid.layers.fill_constant(shape=[1], dtype='int64', value=10) @@ -887,14 +970,17 @@ def array_write(x, i, array=None): def create_array(dtype): - """This function creates an array of type :math:`LOD_TENSOR_ARRAY` using the - LayerHelper. + """ + **Create LoDTensorArray** + + This function creates an array of LOD_TENSOR_ARRAY . It is mainly used to + implement RNN with array_write, array_read and While. Args: - dtype (int|float): The data type of the elements in the array. + dtype (int|float): The data type of the elements in the lod_tensor_array. Returns: - Variable: The tensor variable storing the elements of data type. + Variable: The lod_tensor_array variable storing the elements of data type. Examples: .. code-block:: python @@ -909,37 +995,40 @@ def create_array(dtype): dtype=dtype) -def less_than(x, y, force_cpu=True, cond=None, **ignored): +@templatedoc() +def less_than(x, y, force_cpu=None, cond=None, **ignored): """ - **Less than** + ${comment} - This layer returns the truth value of :math:`x < y` elementwise. + >>> import paddle.fluid as fluid + >>> less = fluid.layers.less_than(x=label, y=limit) Args: - x(Variable): First operand of *less_than* - y(Variable): Second operand of *less_than* - force_cpu(Bool|True): The output data will be on CPU if set true. + x(${x_type}): ${x_comment}. + y(${y_type}): ${y_comment}. + force_cpu(${force_cpu_type}): ${force_cpu_comment}. cond(Variable|None): Optional output variable to store the result of *less_than* Returns: - Variable: The tensor variable storing the output of *less_than*. - - Examples: - .. code-block:: python - - less = fluid.layers.less_than(x=label, y=limit) + ${out_comment}. """ helper = LayerHelper("less_than", **locals()) if cond is None: cond = helper.create_tmp_variable(dtype='bool') cond.stop_gradient = True + attrs = dict() + if force_cpu is not None: + attrs['force_cpu'] = force_cpu + elif force_init_on_cpu(): + attrs['force_cpu'] = force_init_on_cpu() + helper.append_op( type='less_than', inputs={'X': [x], 'Y': [y]}, outputs={'Out': [cond]}, - attrs={'force_cpu': force_cpu or force_init_on_cpu()}) + attrs=attrs) return cond @@ -974,16 +1063,34 @@ def equal(x, y, cond=None, **ignored): def array_read(array, i): - """This function performs the operation to read the data in as an + """ + This function performs the operation to read the data in as an LOD_TENSOR_ARRAY. + + .. code-block:: text + + Given: + + array = [0.6, 0.1, 0.3, 0.1] + + And: + + i = 2 + + Then: + + output = 0.3 + Args: - array (Variable|list): The input tensor that will be written to an array. - i (Variable|list): The subscript index in tensor array, that points the - place where data will be written to. + array (Variable|list): The input tensor that store data to be read. + i (Variable|list): The index of the data to be read from input array. + Returns: Variable: The tensor type variable that has the data written to it. + Examples: - .. code-block::python + .. code-block:: python + tmp = fluid.layers.zeros(shape=[10], dtype='int32') i = fluid.layers.fill_constant(shape=[1], dtype='int64', value=10) arr = layers.array_read(tmp, i=i) @@ -1004,8 +1111,28 @@ def array_read(array, i): def shrink_memory(x, i, table): """ - This function creates an operator to shrink_rnn_memory using the RankTable + This function creates an operator to shrink rnn memory using the RankTable as mentioned in the input parameter. + + NOTE: This API is very low-level API. It is used by DynamicRNN only. + + Since the Dynamic RNN uses no-padding way to implement RNN. The sequence + will be sorted by order, and the length of valid memory will be shrink after + each time step. + + Args: + x(Variable): The memory object in the previous time step. + i(Variable): The step count variable. A int scalar as LoDTensor. + table(Variable): The RNNRankTable object. + + Returns: + the memory variable after shrink. + + Examples: + + Since this API is very low level API. The example is not provided. + Please reference the implementation of class DynamicRNN for detail + usage. """ helper = LayerHelper('shrink_memory', **locals()) out = helper.create_tmp_variable(dtype=x.dtype) @@ -1020,9 +1147,14 @@ def shrink_memory(x, i, table): def array_length(array): - """This function performs the operation to find the length of the input + """ + **Get the Length of Input LoDTensorArray** + + This function performs the operation to find the length of the input LOD_TENSOR_ARRAY. + Related API: array_read, array_write, While. + Args: array (LOD_TENSOR_ARRAY): The input array that will be used to compute the length. @@ -1031,12 +1163,13 @@ def array_length(array): Variable: The length of the input LoDTensorArray. Examples: - .. code-block::python + .. code-block:: python tmp = fluid.layers.zeros(shape=[10], dtype='int32') i = fluid.layers.fill_constant(shape=[1], dtype='int64', value=10) arr = fluid.layers.array_write(tmp, i=i) arr_len = fluid.layers.array_length(arr) + """ helper = LayerHelper('array_length', **locals()) tmp = helper.create_tmp_variable(dtype='int64') @@ -1047,6 +1180,13 @@ def array_length(array): class ConditionalBlockGuard(BlockGuard): + """ + ConditionalBlockGuard is derived from BlockGuard. It is dedicated for + holding a ConditionalBlock, and helping users entering and exiting the + ConditionalBlock via Python's 'with' keyword. However, ConditionalBlockGuard + is generally an internal component of IfElse, users should not use it directly. + """ + def __init__(self, block): if not isinstance(block, ConditionalBlock): raise TypeError("block should be conditional block") @@ -1063,6 +1203,31 @@ class ConditionalBlockGuard(BlockGuard): class ConditionalBlock(object): + ''' + **ConditionalBlock** + + ConditionalBlock is an operator that bind a block to a specific condition, + if the condition matches, the corresponding block will be executed. + + Args: + inputs (Variable): bool conditions. + is_scalar_condition (bool): whether the branch is controled by a scalar. + name(str): name of this ConditionalBlock. + + Examples: + .. code-block:: python + + cond = layers.less_than(x=label, y=limit) + true_image, false_image = layers.split_lod_tensor( + input=image, mask=cond) + true_cond = layers.ConditionalBlock([true_image]) + + with true_cond.block(): + ... + with false_cond.block(): + ... + ''' + def __init__(self, inputs, is_scalar_condition=False, name=None): for each_input in inputs: if not isinstance(each_input, Variable): @@ -1120,6 +1285,42 @@ class ConditionalBlock(object): class Switch(object): + """ + Switch class works just like a `if-elif-else`. Can be used in learning rate scheduler + to modify learning rate + + The Semantics: + + 1. A `switch` control-flow checks cases one-by-one. + + 2. The condition of each case is a boolean value, which is a scalar Variable. + + 3. It runs the first matched case, or the default case if there is one. + + 4. Once it matches a case, it runs the corresponding branch and only that branch. + + Examples: + .. code-block:: python + + lr = fluid.layers.tensor.create_global_var( + shape=[1], + value=0.0, + dtype='float32', + persistable=True, + name="learning_rate") + one_var = tensor.fill_constant( + shape=[1], dtype='float32', value=1.0) + two_var = tensor.fill_constant( + shape=[1], dtype='float32', value=2.0) + + with fluid.layers.control_flow.Switch() as switch: + with switch.case(global_step == zero_var): + fluid.layers.tensor.assign(input=one_var, output=lr) + with switch.default(): + fluid.layers.tensor.assign(input=two_var, output=lr) + + """ + def __init__(self, name=None): self.helper = LayerHelper('switch', name=name) self.inside_scope = False @@ -1149,7 +1350,8 @@ class Switch(object): return ConditionalBlockGuard(cond_block) def default(self): - """create a default case for this switch + """ + create a default case for this switch """ pre_cond_num = len(self.pre_not_conditions) if pre_cond_num == 0: @@ -1339,6 +1541,38 @@ class IfElse(object): class DynamicRNN(object): + """ + The dynamic RNN can process a batch of sequence data. The length of each + sample sequence can be different. This API automatically process them in + batch. + + The input lod must be set. Please reference `lod_tensor` + + >>> import paddle.fluid as fluid + >>> data = fluid.layers.data(name='sentence', dtype='int64', lod_level=1) + >>> embedding = fluid.layers.embedding(input=data, size=[65535, 32], + >>> is_sparse=True) + >>> + >>> drnn = fluid.layers.DynamicRNN() + >>> with drnn.block(): + >>> word = drnn.step_input(embedding) + >>> prev = drnn.memory(shape=[200]) + >>> hidden = fluid.layers.fc(input=[word, prev], size=200, act='relu') + >>> drnn.update_memory(prev, hidden) # set prev to hidden + >>> drnn.output(hidden) + >>> + >>> # last is the last time step of rnn. It is the encoding result. + >>> last = fluid.layers.sequence_last_step(drnn()) + + The dynamic RNN will unfold sequence into timesteps. Users need to define + how to process each time step during the :code:`with` block. + + The `memory` is used staging data cross time step. The initial value of + memory can be zero or another variable. + + The dynamic RNN can mark multiple variables as its output. Use `drnn()` to + get the output sequence. + """ BEFORE_RNN = 0 IN_RNN = 1 AFTER_RNN = 2 @@ -1361,6 +1595,15 @@ class DynamicRNN(object): self.mem_link = [] def step_input(self, x): + """ + Mark a sequence as a dynamic RNN input. + Args: + x(Variable): The input sequence. + + Returns: + The current timestep in the input sequence. + + """ self._assert_in_rnn_block_("step_input") if not isinstance(x, Variable): raise TypeError( @@ -1404,6 +1647,15 @@ class DynamicRNN(object): return array_read(array=input_array, i=self.step_idx) def static_input(self, x): + """ + Mark a variable as a RNN input. The input will not be scattered into + time steps. + Args: + x(Variable): The input variable. + + Returns: + The input variable that can access in RNN. + """ self._assert_in_rnn_block_("static_input") if not isinstance(x, Variable): raise TypeError( @@ -1425,6 +1677,10 @@ class DynamicRNN(object): @contextlib.contextmanager def block(self): + """ + The block for user to define operators in RNN. See the class docstring + for more details. + """ if self.status != DynamicRNN.BEFORE_RNN: raise ValueError("rnn.block() can only be invoke once") self.step_idx = fill_constant( @@ -1451,6 +1707,9 @@ class DynamicRNN(object): x=each_array, table=self.lod_rank_table)) def __call__(self, *args, **kwargs): + """ + Get the output of RNN. This API should only be invoked after RNN.block() + """ if self.status != DynamicRNN.AFTER_RNN: raise ValueError(("Output of the dynamic RNN can only be visited " "outside the rnn block.")) @@ -1465,6 +1724,70 @@ class DynamicRNN(object): value=0.0, need_reorder=False, dtype='float32'): + """ + Create a memory variable for dynamic rnn. + + If the :code:`init` is not None, :code:`memory` will be initialized by + this variable. The :code:`need_reorder` is used to reorder the memory as + the input variable. It should be set to true when the initialized memory + depends on the input sample. + + For example, + + >>> import paddle.fluid as fluid + >>> sentence = fluid.layers.data( + >>> name='sentence', dtype='float32', shape=[32]) + >>> boot_memory = fluid.layers.data( + >>> name='boot', dtype='float32', shape=[10]) + >>> + >>> drnn = fluid.layers.DynamicRNN() + >>> with drnn.block(): + >>> word = drnn.step_input(sentence) + >>> memory = drnn.memory(init=boot_memory, need_reorder=True) + >>> hidden = fluid.layers.fc( + >>> input=[word, memory], size=10, act='tanh') + >>> drnn.update_memory(ex_mem=memory, new_mem=hidden) + >>> drnn.output(hidden) + >>> rnn_output = drnn() + + + Otherwise, if :code:`shape`, :code:`value`, :code:`dtype` are set, the + :code:`memory` will be initialized by this :code:`value`. + + For example, + + >>> import paddle.fluid as fluid + >>> sentence = fluid.layers.data( + >>> name='sentence', dtype='float32', shape=[32]) + >>> + >>> drnn = fluid.layers.DynamicRNN() + >>> with drnn.block(): + >>> word = drnn.step_input(sentence) + >>> memory = drnn.memory(shape=[10], dtype='float32', value=0) + >>> hidden = fluid.layers.fc( + >>> input=[word, memory], size=10, act='tanh') + >>> drnn.update_memory(ex_mem=memory, new_mem=hidden) + >>> drnn.output(hidden) + >>> rnn_output = drnn() + + + Args: + init(Variable|None): The initialized variable. + + shape(list|tuple): The memory shape. NOTE the shape does not contain + batch_size. + + value(float): the initalized value. + + need_reorder(bool): True if the initialized memory depends on the + input sample. + + dtype(str|numpy.dtype): The data type of the initialized memory. + + Returns: + the memory variable. + + """ self._assert_in_rnn_block_('memory') if init is not None: if not isinstance(init, Variable): @@ -1532,6 +1855,16 @@ class DynamicRNN(object): return self.memory(init=init) def update_memory(self, ex_mem, new_mem): + """ + Update the memory from ex_mem to new_mem. NOTE that the shape and data + type of :code:`ex_mem` and :code:`new_mem` must be same. + Args: + ex_mem(Variable): the memory variable. + new_mem(Variable): the plain variable generated in RNN block. + + Returns: + None + """ self._assert_in_rnn_block_('update_memory') if not isinstance(ex_mem, Variable): raise TypeError("The input arg `ex_mem` of update_memory() must " @@ -1549,6 +1882,15 @@ class DynamicRNN(object): self.mem_link.append((new_mem, mem_array)) def output(self, *outputs): + """ + mark the RNN output variables. + + Args: + outputs: The output variables. + + Returns: + None + """ self._assert_in_rnn_block_('output') parent_block = self._parent_block_() for each in outputs: @@ -1591,26 +1933,26 @@ def reorder_lod_tensor_by_rank(x, rank_table): def is_empty(x, cond=None, **ignored): """ - **Is Empty** - - This layer returns the truth value of whether the variable is empty. + Test whether a Variable is empty. Args: - x(Variable): Operand of *is_empty* - cond(Variable|None): Optional output variable to store the result - of *is_empty* + x (Variable): The Variable to be tested. + cond (Variable|None): Output parameter. Returns the test result + of given 'x'. Default: None Returns: - Variable: The tensor variable storing the output of *is_empty*. + Variable: A bool scalar. True if 'x' is an empty Variable. Raises: TypeError: If input cond is not a variable, or cond's dtype is - not bool + not bool. Examples: .. code-block:: python - less = fluid.layers.is_empty(x=input) + res = fluid.layers.is_empty(x=input) + # or: + fluid.layers.is_empty(x=input, cond=res) """ helper = LayerHelper("is_empty", **locals()) if cond is None: diff --git a/python/paddle/fluid/layers/detection.py b/python/paddle/fluid/layers/detection.py index 3a83db12fd..6af01297df 100644 --- a/python/paddle/fluid/layers/detection.py +++ b/python/paddle/fluid/layers/detection.py @@ -16,7 +16,7 @@ All layers just related to the detection neural network. """ from layer_function_generator import generate_layer_fn -from layer_function_generator import autodoc +from layer_function_generator import autodoc, templatedoc from ..layer_helper import LayerHelper import tensor import nn @@ -30,6 +30,7 @@ __all__ = [ 'detection_output', 'ssd_loss', 'detection_map', + 'anchor_generator', ] __auto__ = [ @@ -97,7 +98,9 @@ def detection_output(loc, nms_eta(float): The parameter for adaptive NMS. Returns: - Variable: The detection outputs is a LoDTensor with shape [No, 6]. + Variable: + + The detection outputs is a LoDTensor with shape [No, 6]. Each row has six values: [label, confidence, xmin, ymin, xmax, ymax]. `No` is the total number of detections in this mini-batch. For each instance, the offsets in first dimension are called LoD, the offset @@ -110,15 +113,15 @@ def detection_output(loc, Examples: .. code-block:: python - pb = layers.data(name='prior_box', shape=[10, 4], + pb = layers.data(name='prior_box', shape=[10, 4], append_batch_size=False, dtype='float32') - pbv = layers.data(name='prior_box_var', shape=[10, 4], + pbv = layers.data(name='prior_box_var', shape=[10, 4], append_batch_size=False, dtype='float32') - loc = layers.data(name='target_box', shape=[2, 21, 4], + loc = layers.data(name='target_box', shape=[2, 21, 4], append_batch_size=False, dtype='float32') - scores = layers.data(name='scores', shape=[2, 21, 10], + scores = layers.data(name='scores', shape=[2, 21, 10], append_batch_size=False, dtype='float32') - nmsed_outs = fluid.layers.detection_output(scores=scores, + nmsed_outs = fluid.layers.detection_output(scores=scores, loc=loc, prior_box=pb, prior_box_var=pbv) @@ -153,7 +156,7 @@ def detection_output(loc, return nmsed_outs -@autodoc() +@templatedoc() def detection_map(detect_res, label, class_num, @@ -164,6 +167,47 @@ def detection_map(detect_res, input_states=None, out_states=None, ap_version='integral'): + """ + ${comment} + + Args: + detect_res: ${detect_res_comment} + label: ${label_comment} + class_num: ${class_num_comment} + background_label: ${background_label_comment} + overlap_threshold: ${overlap_threshold_comment} + evaluate_difficult: ${evaluate_difficult_comment} + has_state: ${has_state_comment} + input_states: If not None, It contains 3 elements: + 1. pos_count ${pos_count_comment}. + 2. true_pos ${true_pos_comment}. + 3. false_pos ${false_pos_comment}. + out_states: If not None, it contains 3 elements. + 1. accum_pos_count ${accum_pos_count_comment}. + 2. accum_true_pos ${accum_true_pos_comment}. + 3. accum_false_pos ${accum_false_pos_comment}. + ap_version: ${ap_type_comment} + + Returns: + ${map_comment} + + + Examples: + .. code-block:: python + + detect_res = fluid.layers.data( + name='detect_res', + shape=[10, 6], + append_batch_size=False, + dtype='float32') + label = fluid.layers.data( + name='label', + shape=[10, 6], + append_batch_size=False, + dtype='float32') + + map_out = fluid.layers.detection_map(detect_res, label, 21) + """ helper = LayerHelper("detection_map", **locals()) def __create_var(type): @@ -210,53 +254,68 @@ def bipartite_match(dist_matrix, dist_threshold=None, name=None): """ - **Bipartite matchint operator** - - This operator is a greedy bipartite matching algorithm, which is used to - obtain the matching with the maximum distance based on the input + This operator implements a greedy bipartite matching algorithm, which is + used to obtain the matching with the maximum distance based on the input distance matrix. For input 2D matrix, the bipartite matching algorithm can - find the matched column for each row, also can find the matched row for - each column. And this operator only calculate matched indices from column - to row. For each instance, the number of matched indices is the number of - of columns of the input ditance matrix. - - There are two outputs to save matched indices and distance. - A simple description, this algothrim matched the best (maximum distance) + find the matched column for each row (matched means the largest distance), + also can find the matched row for each column. And this operator only + calculate matched indices from column to row. For each instance, + the number of matched indices is the column number of the input distance + matrix. + + There are two outputs, matched indices and distance. + A simple description, this algorithm matched the best (maximum distance) row entity to the column entity and the matched indices are not duplicated in each row of ColToRowMatchIndices. If the column entity is not matched any row entity, set -1 in ColToRowMatchIndices. - Please note that the input DistMat can be LoDTensor (with LoD) or Tensor. + NOTE: the input DistMat can be LoDTensor (with LoD) or Tensor. If LoDTensor with LoD, the height of ColToRowMatchIndices is batch size. If Tensor, the height of ColToRowMatchIndices is 1. + NOTE: This API is a very low level API. It is used by :code:`ssd_loss` + layer. Please consider to use :code:`ssd_loss` instead. + Args: dist_matrix(Variable): This input is a 2-D LoDTensor with shape [K, M]. It is pair-wise distance matrix between the entities represented by each row and each column. For example, assumed one entity is A with shape [K], another entity is B with shape [M]. The - dist_matirx[i][j] is the distance between A[i] and B[j]. The bigger - the distance is, the better macthing the pairs are. Please note, - This tensor can contain LoD information to represent a batch of - inputs. One instance of this batch can contain different numbers of - entities. + dist_matrix[i][j] is the distance between A[i] and B[j]. The bigger + the distance is, the better matching the pairs are. + + NOTE: This tensor can contain LoD information to represent a batch + of inputs. One instance of this batch can contain different numbers + of entities. match_type(string|None): The type of matching method, should be - 'bipartite' or 'per_prediction', 'bipartite' by defalut. + 'bipartite' or 'per_prediction'. [default 'bipartite']. dist_threshold(float|None): If `match_type` is 'per_prediction', this threshold is to determine the extra matching bboxes based - on the maximum distance, 0.5 by defalut. + on the maximum distance, 0.5 by default. Returns: - match_indices(Variable): A 2-D Tensor with shape [N, M] in int type. - N is the batch size. If match_indices[i][j] is -1, it - means B[j] does not match any entity in i-th instance. - Otherwise, it means B[j] is matched to row - match_indices[i][j] in i-th instance. The row number of - i-th instance is saved in match_indices[i][j]. - match_distance(Variable): A 2-D Tensor with shape [N, M] in float type. - N is batch size. If match_indices[i][j] is -1, - match_distance[i][j] is also -1.0. Otherwise, assumed - match_distance[i][j] = d, and the row offsets of each instance - are called LoD. Then match_distance[i][j] = dist_matrix[d+LoD[i]][j]. + tuple: a tuple with two elements is returned. The first is + matched_indices, the second is matched_distance. + + The matched_indices is a 2-D Tensor with shape [N, M] in int type. + N is the batch size. If match_indices[i][j] is -1, it + means B[j] does not match any entity in i-th instance. + Otherwise, it means B[j] is matched to row + match_indices[i][j] in i-th instance. The row number of + i-th instance is saved in match_indices[i][j]. + + The matched_distance is a 2-D Tensor with shape [N, M] in float type + . N is batch size. If match_indices[i][j] is -1, + match_distance[i][j] is also -1.0. Otherwise, assumed + match_distance[i][j] = d, and the row offsets of each instance + are called LoD. Then match_distance[i][j] = + dist_matrix[d+LoD[i]][j]. + + Examples: + + >>> x = fluid.layers.data(name='x', shape=[4], dtype='float32') + >>> y = fluid.layers.data(name='y', shape=[4], dtype='float32') + >>> iou = fluid.layers.iou_similarity(x=x, y=y) + >>> matched_indices, matched_dist = fluid.layers.bipartite_match(iou) """ helper = LayerHelper('bipartite_match', **locals()) match_indices = helper.create_tmp_variable(dtype='int32') @@ -281,8 +340,6 @@ def target_assign(input, mismatch_value=None, name=None): """ - **Target assigner operator** - This operator can be, for given the target bounding boxes or labels, to assign classification and regression targets to each prediction as well as weights to prediction. The weights is used to specify which prediction would @@ -296,20 +353,24 @@ def target_assign(input, 1. Assigning all outpts based on `match_indices`: - If id = match_indices[i][j] > 0, + .. code-block:: text + + If id = match_indices[i][j] > 0, - out[i][j][0 : K] = X[lod[i] + id][j % P][0 : K] - out_weight[i][j] = 1. + out[i][j][0 : K] = X[lod[i] + id][j % P][0 : K] + out_weight[i][j] = 1. - Otherwise, + Otherwise, - out[j][j][0 : K] = {mismatch_value, mismatch_value, ...} - out_weight[i][j] = 0. + out[j][j][0 : K] = {mismatch_value, mismatch_value, ...} + out_weight[i][j] = 0. 2. Assigning out_weight based on `neg_indices` if `neg_indices` is provided: Assumed that the row offset for each instance in `neg_indices` is called neg_lod, for i-th instance and each `id` of neg_indices in this instance: + + .. code-block:: text out[i][id][0 : K] = {mismatch_value, mismatch_value, ...} out_weight[i][id] = 1.0 @@ -326,10 +387,23 @@ def target_assign(input, mismatch_value (float32): Fill this value to the mismatched location. Returns: - out (Variable): The output is a 3D Tensor with shape [N, P, K], - N and P is the same as they are in `neg_indices`, K is the - same as it in input of X. If `match_indices[i][j]`. - out_weight (Variable): The weight for output with the shape of [N, P, 1]. + tuple: + + A tuple(out, out_weight) is returned. out is a 3D Tensor with + shape [N, P, K], N and P is the same as they are in + `neg_indices`, K is the same as it in input of X. If + `match_indices[i][j]`. out_weight is the weight for output with + the shape of [N, P, 1]. + + Examples: + + .. code-block:: python + + matched_indices, matched_dist = fluid.layers.bipartite_match(iou) + gt = layers.data( + name='gt', shape=[1, 1], dtype='int32', lod_level=1) + trg, trg_weight = layers.target_assign( + gt, matched_indices, mismatch_value=0) """ helper = LayerHelper('target_assign', **locals()) out = helper.create_tmp_variable(dtype=input.dtype) @@ -364,7 +438,7 @@ def ssd_loss(location, normalize=True, sample_size=None): """ - **Multi-box loss layer for object dection algorithm of SSD** + **Multi-box loss layer for object detection algorithm of SSD** This layer is to compute dection loss for SSD given the location offset predictions, confidence predictions, prior boxes and ground-truth boudding @@ -372,21 +446,35 @@ def ssd_loss(location, is a weighted sum of the localization loss (or regression loss) and confidence loss (or classification loss) by performing the following steps: - 1. Find matched boundding box by bipartite matching algorithm. + 1. Find matched bounding box by bipartite matching algorithm. + 1.1 Compute IOU similarity between ground-truth boxes and prior boxes. + 1.2 Compute matched boundding box by bipartite matching algorithm. + 2. Compute confidence for mining hard examples + 2.1. Get the target label based on matched indices. + 2.2. Compute confidence loss. + 3. Apply hard example mining to get the negative example indices and update the matched indices. + 4. Assign classification and regression targets + 4.1. Encoded bbox according to the prior boxes. + 4.2. Assign regression targets. + 4.3. Assign classification targets. + 5. Compute the overall objective loss. + 5.1 Compute confidence loss. + 5.1 Compute localization loss. + 5.3 Compute the overall weighted loss. Args: @@ -421,39 +509,36 @@ def ssd_loss(location, mining_type (str): The hard example mining type, should be 'hard_example' or 'max_negative', now only support `max_negative`. normalize (bool): Whether to normalize the SSD loss by the total number - of output locations, True by defalut. + of output locations, True by default. sample_size (int): The max sample size of negative box, used only when mining_type is 'hard_example'. Returns: - Variable: The weighted sum of the localization loss and confidence loss, - with shape [N * Np, 1], N and Np are the same as they are - in `location`. + The weighted sum of the localization loss and confidence loss, with \ + shape [N * Np, 1], N and Np are the same as they are in `location`. Raises: - ValueError: If mining_type is 'hard_example', now only support - mining type of `max_negative`. + ValueError: If mining_type is 'hard_example', now only support mining \ + type of `max_negative`. Examples: - .. code-block:: python - - pb = layers.data( - name='prior_box', - shape=[10, 4], - append_batch_size=False, - dtype='float32') - pbv = layers.data( - name='prior_box_var', - shape=[10, 4], - append_batch_size=False, - dtype='float32') - loc = layers.data(name='target_box', shape=[10, 4], dtype='float32') - scores = layers.data(name='scores', shape=[10, 21], dtype='float32') - gt_box = layers.data( - name='gt_box', shape=[4], lod_level=1, dtype='float32') - gt_label = layers.data( - name='gt_label', shape=[1], lod_level=1, dtype='float32') - loss = layers.ssd_loss(loc, scores, gt_box, gt_label, pb, pbv) + >>> pb = fluid.layers.data( + >>> name='prior_box', + >>> shape=[10, 4], + >>> append_batch_size=False, + >>> dtype='float32') + >>> pbv = fluid.layers.data( + >>> name='prior_box_var', + >>> shape=[10, 4], + >>> append_batch_size=False, + >>> dtype='float32') + >>> loc = fluid.layers.data(name='target_box', shape=[10, 4], dtype='float32') + >>> scores = fluid.layers.data(name='scores', shape=[10, 21], dtype='float32') + >>> gt_box = fluid.layers.data( + >>> name='gt_box', shape=[4], lod_level=1, dtype='float32') + >>> gt_label = fluid.layers.data( + >>> name='gt_label', shape=[1], lod_level=1, dtype='float32') + >>> loss = fluid.layers.ssd_loss(loc, scores, gt_box, gt_label, pb, pbv) """ helper = LayerHelper('ssd_loss', **locals()) @@ -577,7 +662,7 @@ def prior_box(input, offset=0.5, name=None): """ - **Prior box operator** + **Prior Box Operator** Generate prior boxes for SSD(Single Shot MultiBox Detector) algorithm. Each position of the input produce N prior boxes, N is determined by @@ -606,26 +691,30 @@ def prior_box(input, name(str): Name of the prior box op. Default: None. Returns: - boxes(Variable): the output prior boxes of PriorBox. - The layout is [H, W, num_priors, 4]. - H is the height of input, W is the width of input, - num_priors is the total - box count of each position of input. - Variances(Variable): the expanded variances of PriorBox. - The layout is [H, W, num_priors, 4]. - H is the height of input, W is the width of input - num_priors is the total - box count of each position of input + tuple: A tuple with two Variable (boxes, variances) + + boxes: the output prior boxes of PriorBox. + The layout is [H, W, num_priors, 4]. + H is the height of input, W is the width of input, + num_priors is the total + box count of each position of input. + + variances: the expanded variances of PriorBox. + The layout is [H, W, num_priors, 4]. + H is the height of input, W is the width of input + num_priors is the total + box count of each position of input Examples: .. code-block:: python - box, var = prior_box( - input=conv1, - image=images, - min_sizes=[100.], - flip=True, - clip=True) + + box, var = fluid.layers.prior_box( + input=conv1, + image=images, + min_sizes=[100.], + flip=True, + clip=True) """ helper = LayerHelper("prior_box", **locals()) dtype = helper.input_dtype() @@ -695,11 +784,9 @@ def multi_box_head(inputs, stride=1, name=None): """ - **Prior_boxes** - Generate prior boxes for SSD(Single Shot MultiBox Detector) algorithm. The details of this algorithm, please refer the - section 2.2 of SSD paper (SSD: Single Shot MultiBox Detector) + section 2.2 of SSD paper `SSD: Single Shot MultiBox Detector `_ . Args: @@ -740,24 +827,27 @@ def multi_box_head(inputs, name(str): Name of the prior box layer. Default: None. Returns: - mbox_loc(Variable): The predicted boxes' location of the inputs. - The layout is [N, H*W*Priors, 4]. where Priors - is the number of predicted boxes each position of each input. - mbox_conf(Variable): The predicted boxes' confidence of the inputs. - The layout is [N, H*W*Priors, C]. where Priors - is the number of predicted boxes each position of each input - and C is the number of Classes. - boxes(Variable): the output prior boxes of PriorBox. - The layout is [num_priors, 4]. num_priors is the total - box count of each position of inputs. - Variances(Variable): the expanded variances of PriorBox. - The layout is [num_priors, 4]. num_priors is the total - box count of each position of inputs + tuple: A tuple with four Variables. (mbox_loc, mbox_conf, boxes, variances) + + mbox_loc: The predicted boxes' location of the inputs. The layout + is [N, H*W*Priors, 4]. where Priors is the number of predicted + boxes each position of each input. + + mbox_conf: The predicted boxes' confidence of the inputs. The layout + is [N, H*W*Priors, C]. where Priors is the number of predicted boxes + each position of each input and C is the number of Classes. + + boxes: the output prior boxes of PriorBox. The layout is [num_priors, 4]. + num_priors is the total box count of each position of inputs. + + variances: the expanded variances of PriorBox. The layout is + [num_priors, 4]. num_priors is the total box count of each position of inputs Examples: .. code-block:: python - mbox_locs, mbox_confs, box, var = layers.multi_box_head( + + mbox_locs, mbox_confs, box, var = fluid.layers.multi_box_head( inputs=[conv1, conv2, conv3, conv4, conv5, conv5], image=images, num_classes=21, @@ -909,3 +999,95 @@ def multi_box_head(inputs, box.stop_gradient = True var.stop_gradient = True return mbox_locs_concat, mbox_confs_concat, box, var + + +def anchor_generator(input, + anchor_sizes=None, + aspect_ratios=None, + variance=[0.1, 0.1, 0.2, 0.2], + stride=None, + offset=0.5, + name=None): + """ + **Anchor generator operator** + + Generate anchors for Faster RCNN algorithm. + Each position of the input produce N anchors, N = + size(anchor_sizes) * size(aspect_ratios). The order of generated anchors + is firstly aspect_ratios loop then anchor_sizes loop. + + Args: + input(Variable): The input feature map, the format is NCHW. + anchor_sizes(list|tuple|float): The anchor sizes of generated anchors, + given in absolute pixels e.g. [64., 128., 256., 512.]. + For instance, the anchor size of 64 means the area of this anchor equals to 64**2. + aspect_ratios(list|tuple|float): The height / width ratios of generated + anchors, e.g. [0.5, 1.0, 2.0]. + variance(list|tuple): The variances to be used in box regression deltas. + Default:[0.1, 0.1, 0.2, 0.2]. + stride(list|turple): The anchors stride across width and height, + e.g. [16.0, 16.0] + offset(float): Prior boxes center offset. Default: 0.5 + name(str): Name of the prior box op. Default: None. + + Returns: + Anchors(Variable): The output anchors with a layout of [H, W, num_anchors, 4]. + H is the height of input, W is the width of input, + num_anchors is the box count of each position. + Each anchor is in (xmin, ymin, xmax, ymax) format an unnormalized. + Variances(Variable): The expanded variances of anchors + with a layout of [H, W, num_priors, 4]. + H is the height of input, W is the width of input + num_anchors is the box count of each position. + Each variance is in (xcenter, ycenter, w, h) format. + + + Examples: + + .. code-block:: python + + anchor, var = anchor_generator( + input=conv1, + anchor_sizes=[64, 128, 256, 512], + aspect_ratios=[0.5, 1.0, 2.0], + variance=[0.1, 0.1, 0.2, 0.2], + stride=[16.0, 16.0], + offset=0.5) + """ + helper = LayerHelper("anchor_generator", **locals()) + dtype = helper.input_dtype() + + def _is_list_or_tuple_(data): + return (isinstance(data, list) or isinstance(data, tuple)) + + if not _is_list_or_tuple_(anchor_sizes): + anchor_sizes = [anchor_sizes] + if not _is_list_or_tuple_(aspect_ratios): + aspect_ratios = [aspect_ratios] + if not (_is_list_or_tuple_(stride) and len(stride) == 2): + raise ValueError('stride should be a list or tuple ', + 'with length 2, (stride_width, stride_height).') + + anchor_sizes = list(map(float, anchor_sizes)) + aspect_ratios = list(map(float, aspect_ratios)) + stride = list(map(float, stride)) + + attrs = { + 'anchor_sizes': anchor_sizes, + 'aspect_ratios': aspect_ratios, + 'variances': variance, + 'stride': stride, + 'offset': offset + } + + anchor = helper.create_tmp_variable(dtype) + var = helper.create_tmp_variable(dtype) + helper.append_op( + type="anchor_generator", + inputs={"Input": input}, + outputs={"Anchors": anchor, + "Variances": var}, + attrs=attrs, ) + anchor.stop_gradient = True + var.stop_gradient = True + return anchor, var diff --git a/python/paddle/fluid/layers/io.py b/python/paddle/fluid/layers/io.py index f3aeb6cd75..977abde21f 100644 --- a/python/paddle/fluid/layers/io.py +++ b/python/paddle/fluid/layers/io.py @@ -22,9 +22,9 @@ from ..executor import global_scope from layer_function_generator import generate_layer_fn, templatedoc __all__ = [ - 'data', 'BlockGuardServ', 'ListenAndServ', 'Send', 'open_recordio_file', - 'open_files', 'read_file', 'shuffle', 'batch', 'double_buffer', - 'random_data_generator', 'Preprocessor', 'load' + 'data', 'BlockGuardServ', 'ListenAndServ', 'Send', 'Recv', + 'open_recordio_file', 'open_files', 'read_file', 'shuffle', 'batch', + 'double_buffer', 'random_data_generator', 'Preprocessor', 'load' ] @@ -109,10 +109,35 @@ class BlockGuardServ(BlockGuard): class ListenAndServ(object): """ - ListenAndServ class. + **ListenAndServ Layer** - ListenAndServ class is used to wrap listen_and_serv op to create a server - which can receive variables from clients and run a block. + ListenAndServ is used to create a rpc server bind and listen + on specific TCP port, this server will run the sub-block when + received variables from clients. + + Args: + endpoint(string): IP:port string which the server will listen on. + inputs(list): a list of variables that the server will get from clients. + fan_in(int): how many client are expected to report to this server, default: 1. + optimizer_mode(bool): whether to run the server as a parameter server, default: True. + + Examples: + .. code-block:: python + + with fluid.program_guard(main): + serv = layers.ListenAndServ( + "127.0.0.1:6170", ["X"], optimizer_mode=False) + with serv.do(): + x = layers.data( + shape=[32, 32], + dtype='float32', + name="X", + append_batch_size=False) + fluid.initializer.Constant(value=1.0)(x, main.global_block()) + layers.scale(x=x, scale=10.0, out=out_var) + + exe = fluid.Executor(place) + exe.run(main) """ def __init__(self, endpoint, inputs, fan_in=1, optimizer_mode=True): @@ -161,7 +186,6 @@ class ListenAndServ(object): main_program = self.helper.main_program current_block = main_program.current_block() parent_block = self.parent_block() - empty_block = Program().global_block() parent_block.append_op( type='listen_and_serv', @@ -170,25 +194,25 @@ class ListenAndServ(object): attrs={ 'endpoint': self.endpoint, 'Fanin': self.fan_in, - 'OptimizeBlock': current_block, - 'PrefetchBlock': empty_block, + 'optimize_blocks': [ + current_block + ], # did not support multiple optimize blocks in layers 'sync_mode': True, # did not support async now in layers 'grad_to_block_id': [""] }) -def Send(endpoints, send_vars, get_vars=None): +def Send(endpoints, send_vars, sync=True): """ - Send layer + Send variables to the server side, and get vars from server + side when server have finished running server side program. Args: - endpoints: comma seperated IP:PORT pairs in the order + endpoints (str): comma seperated IP:PORT pairs in the order of send_vars to send - send_vars: vars to send - get_vars: vars to get from server after send completes. + send_vars (list): variables to send to server + sync (bool): whether to wait the request finish - Send variables to the server side, and get vars from server - side when server have finished running server side program. """ assert (type(send_vars) == list) @@ -196,40 +220,33 @@ def Send(endpoints, send_vars, get_vars=None): endpoints = list(set(epmap)) helper = LayerHelper("Send", **locals()) - if not get_vars: - get_vars = [] - for s in send_vars: - v = helper.create_tmp_variable(dtype=s.dtype, stop_gradient=True) - get_vars.append(v) rpc_op_role_name = core.op_proto_and_checker_maker.kOpRoleAttrName() helper.append_op( type="send", inputs={"X": send_vars}, - outputs={"Out": get_vars}, attrs={ "endpoints": endpoints, "epmap": epmap, rpc_op_role_name: core.op_proto_and_checker_maker.OpRole.RPC }) - - return get_vars + if sync: + helper.append_op(type="send_barrier", attrs={"endpoints": endpoints}) -def Recv(endpoints, get_vars): +def Recv(endpoints, get_vars, sync=True): """ - Recv layer + Receive variables from server side Args: - endpoints: comma seperated IP:PORT pairs in the order + endpoints (str): comma seperated IP:PORT pairs in the order of send_vars to send - send_vars: vars to send - get_vars: vars to get from server after send completes. + get_vars (list): vars to get from server after send completes. + sync (bool): whether to wait the request finish - Send variables to the server side, and get vars from server - side when server have finished running server side program. + Returns: + list: list of received variables """ - assert (type(send_vars) == list) assert (type(get_vars) == list) epmap = endpoints.split(",") @@ -242,6 +259,9 @@ def Recv(endpoints, get_vars): outputs={"Out": get_vars}, attrs={"endpoints": endpoints, "epmap": epmap}) + if sync: + helper.append_op(type="fetch_barrier", attrs={"endpoints": endpoints}) + return get_vars def monkey_patch_reader_methods(reader): @@ -292,6 +312,7 @@ def _copy_reader_create_op_(block, op): return new_op +@templatedoc(op_type='create_recordio_file_reader') def open_recordio_file(filename, shapes, lod_levels, @@ -299,34 +320,30 @@ def open_recordio_file(filename, pass_num=1, for_parallel=True): """ - Open a RecordIO file - - This layer takes a RecordIO file to read from and returns a Reader Variable. - Via the Reader Variable, we can get data from the given RecordIO file. + ${comment} Args: - filename(str): The RecordIO file's name. + filename(${filename_type}): ${filename_comment}. shapes(list): List of tuples which declaring data shapes. - lod_levels(list): List of ints which declaring data lod_level. + lod_levels(${lod_levels_type}): ${lod_levels_comment}. dtypes(list): List of strs which declaring data type. pass_num(int): Number of passes to run. for_parallel(Bool): Set it as True if you are going to run subsequent operators in parallel. Returns: - Variable: A Reader Variable via which we can get RecordIO file data. + ${out_comment}. Examples: - .. code-block:: python - - reader = fluid.layers.io.open_recordio_file( - filename='./data.recordio', - shapes=[(3,224,224), (1)], - lod_levels=[0, 0], - dtypes=['float32', 'int64']) - # Via the reader, we can use 'read_file' layer to get data: - image, label = fluid.layers.io.read_file(reader) + >>> import paddle.fluid as fluid + >>> reader = fluid.layers.io.open_recordio_file( + >>> filename='./data.recordio', + >>> shapes=[(3,224,224), (1)], + >>> lod_levels=[0, 0], + >>> dtypes=['float32', 'int64']) + >>> # Via the reader, we can use 'read_file' layer to get data: + >>> image, label = fluid.layers.io.read_file(reader) """ dtypes = [convert_np_dtype_to_dtype_(dt) for dt in dtypes] shape_concat = [] @@ -358,9 +375,6 @@ def open_recordio_file(filename, if pass_num > 1: main_prog_var = multi_pass(reader=main_prog_var, pass_num=pass_num) - if for_parallel: - main_prog_var = parallel(reader=main_prog_var) - return monkey_patch_reader_methods(main_prog_var) @@ -386,16 +400,16 @@ def random_data_generator(low, high, shapes, lod_levels, for_parallel=True): Variable: A Reader Variable from which we can get random data. Examples: - .. code-block:: python - reader = fluid.layers.io.random_data_generator( - low=0.0, - high=1.0, - shapes=[(3,224,224), (1)], - lod_levels=[0, 0]) + .. code-block:: python - # Via the reader, we can use 'read_file' layer to get data: - image, label = fluid.layers.io.read_file(reader) + reader = fluid.layers.random_data_generator( + low=0.0, + high=1.0, + shapes=[[3,224,224], [1]], + lod_levels=[0, 0]) + # Via the reader, we can use 'read_file' layer to get data: + image, label = fluid.layers.read_file(reader) """ dtypes = [core.VarDesc.VarType.FP32] * len(shapes) shape_concat = [] @@ -452,10 +466,13 @@ def open_files(filenames, lod_levels(list): List of ints which declaring data lod_level. dtypes(list): List of strs which declaring data type. thread_num(int): The maximal concurrent prefetch thread number. - buffer_size(int): The size of prefetch buffer. + buffer_size(int|None): The size of prefetch buffer. If it is setted None, + buffer size will be thread_num * 3. + Default: None pass_num(int): Number of passes to run. for_parallel(Bool): Set it as True if you are going to run subsequent operators in parallel. + Default: True Returns: Variable: A Reader Variable via which we can get file data. @@ -475,7 +492,7 @@ def open_files(filenames, image, label = fluid.layers.io.read_file(reader) """ if buffer_size is None: - buffer_size = thread_num + buffer_size = thread_num * 3 if isinstance(filenames, basestring): filenames = [filenames] dtypes = [convert_np_dtype_to_dtype_(dt) for dt in dtypes] @@ -509,9 +526,6 @@ def open_files(filenames, main_prog_reader = multi_pass( reader=main_prog_reader, pass_num=pass_num) - if for_parallel: - main_prog_reader = parallel(reader=main_prog_reader) - return monkey_patch_reader_methods(main_prog_reader) @@ -544,16 +558,77 @@ def __create_unshared_decorated_reader__(op_type, reader, attrs, name=None): def shuffle(reader, buffer_size): + """ + Shuffle the reader. + """ return __create_unshared_decorated_reader__( 'create_shuffle_reader', reader, {'buffer_size': int(buffer_size)}) def batch(reader, batch_size): + """ + This layer is a reader decorator. It takes a reader and adds + 'batching' decoration on it. When reading with the result + decorated reader, output data will be automatically organized + to the form of batches. + + Args: + reader(Variable): The reader to be decorated with 'batching'. + batch_size(int): The batch size. + + Returns: + Variable: The reader which has been decorated with 'batching'. + + Examples: + .. code-block:: python + + raw_reader = fluid.layers.io.open_files(filenames=['./data1.recordio', + './data2.recordio'], + shapes=[(3,224,224), (1)], + lod_levels=[0, 0], + dtypes=['float32', 'int64'], + thread_num=2, + buffer_size=2) + batch_reader = fluid.layers.batch(reader=raw_reader, batch_size=5) + + # If we read data with the raw_reader: + # data = fluid.layers.read_file(raw_reader) + # We can only get data instance by instance. + # + # However, if we read data with the batch_reader: + # data = fluid.layers.read_file(batch_reader) + # Each 5 adjacent instances will be automatically combined together + # to become a batch. So what we get('data') is a batch data instead + # of an instance. + """ return __create_unshared_decorated_reader__( 'create_batch_reader', reader, {'batch_size': int(batch_size)}) def double_buffer(reader, place=None, name=None): + """ + Wrap a double buffer reader. The data will copy to target place with a + double buffer queue. If the target place is None, the place that executor + perform on will be used. + + Args: + reader(Variable): the reader variable need to be wrapped. + place(Place): the place of target data. Default is the sample place of + executor perform. + + name(str): Variable name. None if the user does not care. + + Returns: + wrapped reader with double buffer. + + Examples: + + >>> reader = fluid.layers.open_files(filenames=['somefile'], + >>> shapes=[[-1, 784], [-1, 1]], + >>> dtypes=['float32', 'int64']) + >>> reader = fluid.layers.double_buffer(reader) + >>> img, label = fluid.layers.read_file(reader) + """ attrs = dict() if place is not None: attrs['place'] = str(place).upper() @@ -566,20 +641,41 @@ def multi_pass(reader, pass_num): 'create_multi_pass_reader', reader, {'pass_num': int(pass_num)}) -def parallel(reader): - return __create_shared_decorated_reader__('create_threaded_reader', reader, - {}) +def read_file(reader): + """ + Execute the given reader and get data via it. + + A reader is also a Variable. It can be a raw reader generated by + `fluid.layers.open_files()` or a decorated one generated by + `fluid.layers.double_buffer()` and so on. + + Args: + + reader(Variable): The reader to execute. + + Returns: + Tuple[Variable]: Data read via the given reader. + Examples: + .. code-block:: python -def read_file(file_obj): + data_file = fluid.layers.open_files( + filenames=['mnist.recordio'], + shapes=[(-1, 748), (-1, 1)], + lod_levels=[0, 0], + dtypes=["float32", "int64"]) + data_file = fluid.layers.double_buffer( + fluid.layers.batch(data_file, batch_size=64)) + input, label = fluid.layers.read_file(data_file) + """ helper = LayerHelper('read_file') out = [ helper.create_tmp_variable( stop_gradient=True, dtype='float32') - for _ in range(len(file_obj.desc.shapes())) + for _ in range(len(reader.desc.shapes())) ] helper.append_op( - type='read', inputs={'Reader': [file_obj]}, outputs={'Out': out}) + type='read', inputs={'Reader': [reader]}, outputs={'Out': out}) if len(out) == 1: return out[0] else: diff --git a/python/paddle/fluid/layers/layer_function_generator.py b/python/paddle/fluid/layers/layer_function_generator.py index cb60a3aec9..3096389101 100644 --- a/python/paddle/fluid/layers/layer_function_generator.py +++ b/python/paddle/fluid/layers/layer_function_generator.py @@ -44,6 +44,18 @@ def _type_to_str_(tp): return framework_pb2.AttrType.Name(tp) +_two_dollar_pattern_ = re.compile(r"\$\$([^\$]+)\$\$") +_single_dollar_pattern_ = re.compile(r"\$([^\$]+)\$") +_two_bang_pattern_ = re.compile(r"!!([^!]+)!!") + + +def escape_math(text): + return _two_bang_pattern_.sub( + r'$$\1$$', + _single_dollar_pattern_.sub(r':math:`\1`', + _two_dollar_pattern_.sub(r"!!\1!!", text))) + + def _generate_doc_string_(op_proto): """ Generate docstring by OpProto @@ -59,18 +71,16 @@ def _generate_doc_string_(op_proto): raise TypeError("OpProto should be `framework_pb2.OpProto`") buf = cStringIO.StringIO() - buf.write(op_proto.comment) + buf.write(escape_math(op_proto.comment)) buf.write('\nArgs:\n') for each_input in op_proto.inputs: line_begin = ' {0}: '.format(_convert_(each_input.name)) buf.write(line_begin) - buf.write(each_input.comment) - buf.write('\n') - buf.write(' ' * len(line_begin)) - buf.write('Duplicable: ') - buf.write(str(each_input.duplicable)) - buf.write(' Optional: ') - buf.write(str(each_input.dispensable)) + buf.write(escape_math(each_input.comment)) + if each_input.duplicable: + buf.write(" Duplicatable.") + if each_input.dispensable: + buf.write(" Optional.") buf.write('\n') skip_attrs = OpProtoHolder.generated_op_attr_names() @@ -83,7 +93,7 @@ def _generate_doc_string_(op_proto): buf.write(' (') buf.write(_type_to_str_(each_attr.type)) buf.write('): ') - buf.write(each_attr.comment) + buf.write(escape_math(each_attr.comment)) buf.write('\n') if len(op_proto.outputs) != 0: @@ -92,7 +102,7 @@ def _generate_doc_string_(op_proto): for each_opt in op_proto.outputs: if not each_opt.intermediate: break - buf.write(each_opt.comment) + buf.write(escape_math(each_opt.comment)) return buf.getvalue() @@ -224,9 +234,6 @@ def autodoc(comment=""): return __impl__ -_inline_math_single_dollar = re.compile(r"\$([^\$]+)\$") - - def templatedoc(op_type=None): """ Decorator of layer function. It will use the docstring from the layer @@ -244,9 +251,6 @@ def templatedoc(op_type=None): def trim_ending_dot(msg): return msg.rstrip('.') - def escape_inline_math(msg): - return _inline_math_single_dollar.sub(repl=r':math:`\1`', string=msg) - def __impl__(func): if op_type is None: op_type_name = func.__name__ @@ -260,7 +264,7 @@ def templatedoc(op_type=None): for line in comment_lines: line = line.strip() if len(line) != 0: - comment += escape_inline_math(line) + comment += escape_math(line) comment += " " elif len(comment) != 0: comment += "\n \n " diff --git a/python/paddle/fluid/layers/learning_rate_scheduler.py b/python/paddle/fluid/layers/learning_rate_scheduler.py index 716cc7824e..6071e3e742 100644 --- a/python/paddle/fluid/layers/learning_rate_scheduler.py +++ b/python/paddle/fluid/layers/learning_rate_scheduler.py @@ -25,10 +25,11 @@ import nn import ops import tensor from ..initializer import init_on_cpu +from ..framework import default_main_program, Parameter __all__ = [ 'exponential_decay', 'natural_exp_decay', 'inverse_time_decay', - 'polynomial_decay', 'piecewise_decay', 'noam_decay' + 'polynomial_decay', 'piecewise_decay', 'noam_decay', 'append_LARS' ] @@ -70,21 +71,40 @@ def noam_decay(d_model, warmup_steps): def exponential_decay(learning_rate, decay_steps, decay_rate, staircase=False): - """Applies exponential decay to the learning rate. + """ + Applies exponential decay to the learning rate. + + When training a model, it is often recommended to lower the learning rate as the + training progresses. By using this function, the learning rate will be decayed by + 'decay_rate' every 'decay_steps' steps. + + >>> if staircase == True: + >>> decayed_learning_rate = learning_rate * decay_rate ^ floor(global_step / decay_steps) + >>> else: + >>> decayed_learning_rate = learning_rate * decay_rate ^ (global_step / decay_steps) - ```python - decayed_learning_rate = learning_rate * - decay_rate ^ (global_step / decay_steps) - ``` Args: - learning_rate: A scalar float32 value or a Variable. This - will be the initial learning rate during training - decay_steps: A Python `int32` number. - decay_rate: A Python `float` number. - staircase: Boolean. If set true, decay the learning rate every decay_steps. + learning_rate(Variable|float): The initial learning rate. + decay_steps(int): See the decay computation above. + decay_rate(float): The decay rate. See the decay computation above. + staircase(Boolean): If True, decay the learning rate at discrete intervals. + Default: False Returns: - The decayed learning rate + Variable: The decayed learning rate + + Examples: + .. code-block:: python + + base_lr = 0.1 + sgd_optimizer = fluid.optimizer.SGD( + learning_rate=fluid.layers.exponential_decay( + learning_rate=base_lr, + decay_steps=10000, + decay_rate=0.5, + staircase=True)) + sgd_optimizer.minimize(avg_cost) + """ global_step = _decay_step_counter() @@ -128,22 +148,39 @@ def natural_exp_decay(learning_rate, decay_steps, decay_rate, staircase=False): def inverse_time_decay(learning_rate, decay_steps, decay_rate, staircase=False): - """Applies inverse time decay to the initial learning rate. + """ + Applies inverse time decay to the initial learning rate. - >>> if staircase: + When training a model, it is often recommended to lower the learning rate as the + training progresses. By using this function, an inverse decay function will be + applied to the initial learning rate. + + >>> if staircase == True: >>> decayed_learning_rate = learning_rate / (1 + decay_rate * floor(global_step / decay_step)) >>> else: >>> decayed_learning_rate = learning_rate / (1 + decay_rate * global_step / decay_step) Args: - learning_rate: A scalar float32 value or a Variable. This - will be the initial learning rate during training. - decay_steps: A Python `int32` number. - decay_rate: A Python `float` number. - staircase: Boolean. If set true, decay the learning rate every decay_steps. + learning_rate(Variable|float): The initial learning rate. + decay_steps(int): See the decay computation above. + decay_rate(float): The decay rate. See the decay computation above. + staircase(Boolean): If True, decay the learning rate at discrete intervals. + Default: False Returns: - The decayed learning rate + Variable: The decayed learning rate + + Examples: + .. code-block:: python + + base_lr = 0.1 + sgd_optimizer = fluid.optimizer.SGD( + learning_rate=fluid.layers.inverse_time_decay( + learning_rate=base_lr, + decay_steps=10000, + decay_rate=0.5, + staircase=True)) + sgd_optimizer.minimize(avg_cost) """ global_step = _decay_step_counter() @@ -162,25 +199,28 @@ def polynomial_decay(learning_rate, end_learning_rate=0.0001, power=1.0, cycle=False): - """Applies polynomial decay to the initial learning rate. + """ + Applies polynomial decay to the initial learning rate. + + .. code-block:: python + + if cycle: + decay_steps = decay_steps * ceil(global_step / decay_steps) + else: + global_step = min(global_step, decay_steps) + decayed_learning_rate = (learning_rate - end_learning_rate) * + (1 - global_step / decay_steps) ^ power + end_learning_rate - >>> if cycle: - >>> decay_steps = decay_steps * ceil(global_step / decay_steps) - >>> else: - >>> global_step = min(global_step, decay_steps) - >>> decayed_learning_rate = (learning_rate - end_learning_rate) * - >>> (1 - global_step / decay_steps) ^ power + - >>> end_learning_rate Args: - learning_rate: A scalar float32 value or a Variable. This - will be the initial learning rate during training - decay_steps: A Python `int32` number. - end_learning_rate: A Python `float` number. - power: A Python `float` number - cycle: Boolean. If set true, decay the learning rate every decay_steps. + learning_rate(Variable|float32): A scalar float32 value or a Variable. This + will be the initial learning rate during training. + decay_steps(int32): A Python `int32` number. + end_learning_rate(float): A Python `float` number. + power(float): A Python `float` number. + cycle(bool): If set true, decay the learning rate every decay_steps. Returns: - The decayed learning rate + Variable: The decayed learning rate """ global_step = _decay_step_counter() @@ -209,15 +249,27 @@ def polynomial_decay(learning_rate, def piecewise_decay(boundaries, values): """Applies piecewise decay to the initial learning rate. - >>> boundaries = [10000, 20000] - >>> values = [1.0, 0.5, 0.1] - >>> - >>> if step < 10000: - >>> learning_rate = 1.0 - >>> elif 10000 <= step < 20000: - >>> learning_rate = 0.5 - >>> else: - >>> learning_rate = 0.1 + The algorithm can be described as the code below. + + .. code-block:: python + + boundaries = [10000, 20000] + values = [1.0, 0.5, 0.1] + if step < 10000: + learning_rate = 1.0 + elif 10000 <= step < 20000: + learning_rate = 0.5 + else: + learning_rate = 0.1 + Args: + boundaries: A list of steps numbers. + values: A list of learning rate values that will be picked during + different step boundaries. + + Returns: + The decayed learning rate. + + """ if len(values) - len(boundaries) != 1: @@ -249,3 +301,41 @@ def piecewise_decay(boundaries, values): tensor.assign(last_value_var, lr) return lr + + +def append_LARS(params_grads, learning_rate, weight_decay): + """Applies LARS (LAYER-WISE ADAPTIVE RATE SCALING) to learning rate for + each layer. + + ```python + learning_rate *= local_gw_ratio * sqrt(sumsq(param)) + / (sqrt(sumsq(gradient))+ weight_decay * sqrt(sumsq(param))) + ``` + + Args: + learning_rate: A learning rate Variable. This + is the global learning rate for LARS. + weight_decay: A Python `float` number. + + Returns: + The decayed learning rate + """ + + def _balanced_weight(param_norm, grad_norm): + if weight_decay == 1.0: + return grad_norm + param_norm + else: + return grad_norm + weight_decay * param_norm + + for param, grad in params_grads: + param_lr = param.optimize_attr['learning_rate'] + param_norm = ops.sqrt(nn.reduce_sum(input=ops.square(param))) + grad_norm = ops.sqrt(nn.reduce_sum(input=ops.square(grad))) + if type(param_lr) == float and param_lr == 1.0: + decayed_lr = learning_rate * param_norm \ + / _balanced_weight(param_norm, grad_norm) + else: + decayed_lr = learning_rate * param_lr * param_norm \ + / _balanced_weight(param_norm, grad_norm) + # set back param local learning rate + param.optimize_attr['learning_rate'] = decayed_lr diff --git a/python/paddle/fluid/layers/metric.py b/python/paddle/fluid/layers/metric_op.py similarity index 50% rename from python/paddle/fluid/layers/metric.py rename to python/paddle/fluid/layers/metric_op.py index a1c64ce277..99e82fdd04 100644 --- a/python/paddle/fluid/layers/metric.py +++ b/python/paddle/fluid/layers/metric_op.py @@ -27,8 +27,32 @@ __all__ = ['accuracy', 'auc'] def accuracy(input, label, k=1, correct=None, total=None): """ + accuracy layer. + Refer to the https://en.wikipedia.org/wiki/Precision_and_recall + This function computes the accuracy using the input and label. - The output is the top k inputs and their indices. + If the correct label occurs in top k predictions, then correct will increment by one. + Note: the dtype of accuracy is determined by input. the input and label dtype can be different. + + Args: + input(Variable): The input of accuracy layer, which is the predictions of network. + Carry LoD information is supported. + label(Variable): The label of dataset. + k(int): The top k predictions for each class will be checked. + correct(Variable): The correct predictions count. + total(Variable): The total entries count. + + Returns: + Variable: The correct rate. + + Examples: + .. code-block:: python + + data = fluid.layers.data(name="data", shape=[-1, 32, 32], dtype="float32") + label = fluid.layers.data(name="data", shape=[-1,1], dtype="int32") + predict = fluid.layers.fc(input=data, size=10) + acc = fluid.layers.accuracy(input=predict, label=label, k=5) + """ helper = LayerHelper("accuracy", **locals()) topk_out, topk_indices = nn.topk(input, k=k) @@ -53,6 +77,43 @@ def accuracy(input, label, k=1, correct=None, total=None): def auc(input, label, curve='ROC', num_thresholds=200): + """ + **Area Under the Curve (AUC) Layer** + + This implementation computes the AUC according to forward output and label. + It is used very widely in binary classification evaluation. + + Note: If input label contains values other than 0 and 1, it will be cast + to `bool`. Find the relevant definitions `here `_. + + There are two types of possible curves: + + 1. ROC: Receiver operating characteristic; + 2. PR: Precision Recall + + Args: + input(Variable): A floating-point 2D Variable, values are in the range + [0, 1]. Each row is sorted in descending order. This + input should be the output of topk. Typically, this + Variable indicates the probability of each label. + label(Variable): A 2D int Variable indicating the label of the training + data. The height is batch size and width is always 1. + curve(str): Curve type, can be 'ROC' or 'PR'. Default 'ROC'. + num_thresholds(int): The number of thresholds to use when discretizing + the roc curve. Default 200. + + Returns: + Variable: A scalar representing the current AUC. + + Examples: + .. code-block:: python + + # network is a binary classification model and label the ground truth + prediction = network(image, is_infer=True) + auc_out=fluid.layers.auc(input=prediction, label=label) + """ + warnings.warn( "This interface not recommended, fluid.layers.auc compute the auc at every minibatch, \ but can not aggregate them and get the pass AUC, because pass \ @@ -65,7 +126,7 @@ def auc(input, label, curve='ROC', num_thresholds=200): topk_out, topk_indices = nn.topk(input, k=k) auc_out = helper.create_tmp_variable(dtype="float32") helper.append_op( - type="accuracy", + type="auc", inputs={ "Out": [topk_out], "Indices": [topk_indices], diff --git a/python/paddle/fluid/layers/nn.py b/python/paddle/fluid/layers/nn.py index ac3ba4174f..925700d736 100644 --- a/python/paddle/fluid/layers/nn.py +++ b/python/paddle/fluid/layers/nn.py @@ -12,7 +12,7 @@ # See the License for the specific language governing permissions and # limitations under the License. """ -All layers just related to the neural network. +All layers just related to the neural network. """ from ..layer_helper import LayerHelper @@ -23,6 +23,7 @@ from layer_function_generator import autodoc, templatedoc from tensor import concat import utils import random +from .. import unique_name __all__ = [ 'fc', @@ -92,6 +93,9 @@ __all__ = [ 'gather', 'random_crop', 'mean_iou', + 'relu', + 'log', + 'crop', ] @@ -107,14 +111,15 @@ def fc(input, """ **Fully Connected Layer** - The fully connected layer can take multiple tensors as its inputs. It - creates a variable called weights for each input tensor, which represents - a fully connected weight matrix from each input unit to each output unit. - The fully connected layer multiplies each input tensor with its coresponding - weight to produce an output Tensor. If multiple input tensors are given, - the results of multiple multiplications will be sumed up. If bias_attr is - not None, a bias variable will be created and added to the output. Finally, - if activation is not None, it will be applied to the output as well. + This function creates a fully connected layer in the network. It can take + multiple tensors as its inputs. It creates a variable called weights for + each input tensor, which represents a fully connected weight matrix from + each input unit to each output unit. The fully connected layer multiplies + each input tensor with its coresponding weight to produce an output Tensor. + If multiple input tensors are given, the results of multiple multiplications + will be sumed up. If bias_attr is not None, a bias variable will be created + and added to the output. Finally, if activation is not None, it will be applied + to the output as well. This process can be formulated as follows: @@ -155,7 +160,7 @@ def fc(input, name (str, default None): The name of this layer. Returns: - A tensor variable storing the transformation result. + Variable: The transformation result. Raises: ValueError: If rank of the input tensor is less than 2. @@ -163,8 +168,7 @@ def fc(input, Examples: .. code-block:: python - data = fluid.layers.data( - name="data", shape=[32, 32], dtype="float32") + data = fluid.layers.data(name="data", shape=[32, 32], dtype="float32") fc = fluid.layers.fc(input=data, size=1000, act="tanh") """ @@ -196,7 +200,10 @@ def fc(input, else: pre_bias = helper.create_tmp_variable(dtype) helper.append_op( - type="sum", inputs={"X": mul_results}, outputs={"Out": pre_bias}) + type="sum", + inputs={"X": mul_results}, + outputs={"Out": pre_bias}, + attrs={"use_mkldnn": use_mkldnn}) # add bias pre_activation = helper.append_bias_op(pre_bias, dim_start=num_flatten_dims) # add activation @@ -226,11 +233,11 @@ def embedding(input, have two elements which indicate the size of the dictionary of embeddings and the size of each embedding vector respectively. is_sparse(bool): The flag indicating whether to use sparse update. - is_distributed (bool): Whether to run lookup table from remote parameter server. + is_distributed(bool): Whether to run lookup table from remote parameter server. padding_idx(int|long|None): If :attr:`None`, it makes no effect to lookup. Otherwise the given :attr:`padding_idx` indicates padding the output with zeros whenever lookup encounters it in :attr:`input`. If - :math:`padding_idx < 0`, the padding_idx to use in lookup is + :math:`padding_idx < 0`, the :attr:`padding_idx` to use in lookup is :math:`size[0] + dim`. param_attr(ParamAttr): Parameters for this layer dtype(np.dtype|core.VarDesc.VarType|str): The type of data : float32, float_16, int etc @@ -266,6 +273,7 @@ def embedding(input, return tmp +@templatedoc(op_type="lstm") def dynamic_lstm(input, size, h_0=None, @@ -280,56 +288,11 @@ def dynamic_lstm(input, dtype='float32', name=None): """ - **Dynamic LSTM Layer** - - The defalut implementation is diagonal/peephole connection - (https://arxiv.org/pdf/1402.1128.pdf), the formula is as follows: - - .. math:: - - i_t & = \sigma(W_{ix}x_{t} + W_{ih}h_{t-1} + W_{ic}c_{t-1} + b_i) - - f_t & = \sigma(W_{fx}x_{t} + W_{fh}h_{t-1} + W_{fc}c_{t-1} + b_f) - - \\tilde{c_t} & = act_g(W_{cx}x_t + W_{ch}h_{t-1} + b_c) - - o_t & = \sigma(W_{ox}x_{t} + W_{oh}h_{t-1} + W_{oc}c_t + b_o) - - c_t & = f_t \odot c_{t-1} + i_t \odot \\tilde{c_t} - - h_t & = o_t \odot act_h(c_t) - - where the :math:`W` terms denote weight matrices (e.g. :math:`W_{xi}` is - the matrix of weights from the input gate to the input), :math:`W_{ic}, \ - W_{fc}, W_{oc}` are diagonal weight matrices for peephole connections. In - our implementation, we use vectors to reprenset these diagonal weight - matrices. The :math:`b` terms denote bias vectors (:math:`b_i` is the input - gate bias vector), :math:`\sigma` is the non-linear activations, such as - logistic sigmoid function, and :math:`i, f, o` and :math:`c` are the input - gate, forget gate, output gate, and cell activation vectors, respectively, - all of which have the same size as the cell output activation vector :math:`h`. - - The :math:`\odot` is the element-wise product of the vectors. :math:`act_g` - and :math:`act_h` are the cell input and cell output activation functions - and `tanh` is usually used for them. :math:`\\tilde{c_t}` is also called - candidate hidden state, which is computed based on the current input and - the previous hidden state. - - Set `use_peepholes` to `False` to disable peephole connection. The formula - is omitted here, please refer to the paper - http://www.bioinf.jku.at/publications/older/2604.pdf for details. - - Note that these :math:`W_{xi}x_{t}, W_{xf}x_{t}, W_{xc}x_{t}, W_{xo}x_{t}` - operations on the input :math:`x_{t}` are NOT included in this operator. - Users can choose to use fully-connect layer before LSTM layer. + ${comment} Args: - input(Variable): The input of dynamic_lstm layer, which supports - variable-time length input sequence. The underlying - tensor in this Variable is a matrix with shape - (T X 4D), where T is the total time steps in this - mini-batch, D is the hidden size. - size(int): 4 * hidden size. + input (Variable): ${input_comment} + size (int): 4 * hidden size. h_0(Variable): The initial hidden state is an optional input, default is zero. This is a tensor with shape (N x D), where N is the batch size and D is the hidden size. @@ -344,33 +307,26 @@ def dynamic_lstm(input, W_{fh}, W_{oh}`} - The shape is (D x 4D), where D is the hidden size. - bias_attr(ParamAttr|None): The bias attribute for the learnable bias + bias_attr (ParamAttr|None): The bias attribute for the learnable bias weights, which contains two parts, input-hidden bias weights and peephole connections weights if setting `use_peepholes` to `True`. 1. `use_peepholes = False` - - Biases = {:math:`b_c, b_i, b_f, b_o`}. - - The shape is (1 x 4D). + - Biases = {:math:`b_c, b_i, b_f, b_o`}. + - The shape is (1 x 4D). 2. `use_peepholes = True` - - Biases = { :math:`b_c, b_i, b_f, b_o, W_{ic}, \ + - Biases = { :math:`b_c, b_i, b_f, b_o, W_{ic}, \ W_{fc}, W_{oc}`}. - - The shape is (1 x 7D). - use_peepholes(bool): Whether to enable diagonal/peephole connections, - default `True`. - is_reverse(bool): Whether to compute reversed LSTM, default `False`. - gate_activation(str): The activation for input gate, forget gate and - output gate. Choices = ["sigmoid", "tanh", "relu", - "identity"], default "sigmoid". - cell_activation(str): The activation for cell output. Choices = ["sigmoid", - "tanh", "relu", "identity"], default "tanh". - candidate_activation(str): The activation for candidate hidden state. - Choices = ["sigmoid", "tanh", - "relu", "identity"], - default "tanh". - dtype(str): Data type. Choices = ["float32", "float64"], default "float32". - name(str|None): A name for this layer(optional). If set None, the layer - will be named automatically. + - The shape is (1 x 7D). + use_peepholes (bool): ${use_peepholes_comment} + is_reverse (bool): ${is_reverse_comment} + gate_activation (str): ${gate_activation_comment} + cell_activation (str): ${cell_activation_comment} + candidate_activation (str): ${candidate_activation_comment} + dtype (str): Data type. Choices = ["float32", "float64"], default "float32". + name (str|None): A name for this layer(optional). If set None, the layer + will be named automatically. Returns: tuple: The hidden state, and cell state of LSTM. The shape of both \ @@ -541,27 +497,31 @@ def dynamic_lstmp(input, cell_activation(str): The activation for cell output. Choices = ["sigmoid", "tanh", "relu", "identity"], default "tanh". candidate_activation(str): The activation for candidate hidden state. - Choices = ["sigmoid", "tanh", - "relu", "identity"], + Choices = ["sigmoid", "tanh", "relu", "identity"], default "tanh". proj_activation(str): The activation for projection output. - Choices = ["sigmoid", "tanh", - "relu", "identity"], + Choices = ["sigmoid", "tanh", "relu", "identity"], default "tanh". dtype(str): Data type. Choices = ["float32", "float64"], default "float32". name(str|None): A name for this layer(optional). If set None, the layer will be named automatically. Returns: - tuple: The projection of hidden state, and cell state of LSTMP. The \ - shape of projection is (T x P), for the cell state which is \ - (T x D), and both LoD is the same with the `input`. + tuple: A tuple of two output variable: the projection of hidden state, \ + and cell state of LSTMP. The shape of projection is (T x P), \ + for the cell state which is (T x D), and both LoD is the same \ + with the `input`. Examples: + .. code-block:: python + dict_dim, emb_dim = 128, 64 + data = fluid.layers.data(name='sequence', shape=[1], + dtype='int32', lod_level=1) + emb = fluid.layers.embedding(input=data, size=[dict_dim, emb_dim]) hidden_dim, proj_dim = 512, 256 - fc_out = fluid.layers.fc(input=input_seq, size=hidden_dim * 4, + fc_out = fluid.layers.fc(input=emb, size=hidden_dim * 4, act=None, bias_attr=None) proj_out, _ = fluid.layers.dynamic_lstmp(input=fc_out, size=hidden_dim * 4, @@ -627,10 +587,10 @@ def dynamic_gru(input, candidate_activation='tanh', h_0=None): """ - **Dynamic GRU Layer** + **Gated Recurrent Unit (GRU) Layer** Refer to `Empirical Evaluation of Gated Recurrent Neural Networks on - Sequence Modeling `_ + Sequence Modeling `_ . The formula is as follows: @@ -677,17 +637,25 @@ def dynamic_gru(input, Choices = ["sigmoid", "tanh", "relu", "identity"], default "sigmoid". candidate_activation(str): The activation for candidate hidden state. Choices = ["sigmoid", "tanh", "relu", "identity"], default "tanh". - h_0 (Variable): The hidden output of the first time step. + h_0 (Variable): This is initial hidden state. If not set, default is + zero. This is a tensor with shape (N x D), where N is the number of + total time steps of input mini-batch feature and D is the hidden + size. Returns: Variable: The hidden state of GRU. The shape is :math:`(T \\times D)`, \ - and lod is the same with the input. + and sequence length is the same with the input. Examples: + .. code-block:: python + dict_dim, emb_dim = 128, 64 + data = fluid.layers.data(name='sequence', shape=[1], + dtype='int32', lod_level=1) + emb = fluid.layers.embedding(input=data, size=[dict_dim, emb_dim]) hidden_dim = 512 - x = fluid.layers.fc(input=data, size=hidden_dim * 3) + x = fluid.layers.fc(input=emb, size=hidden_dim * 3) hidden = fluid.layers.dynamic_gru(input=x, dim=hidden_dim) """ @@ -835,11 +803,14 @@ def linear_chain_crf(input, label, param_attr=None): Args: input(${emission_type}): ${emission_comment} + input(${transition_type}): ${transition_comment} label(${label_type}): ${label_comment} param_attr(ParamAttr): The attribute of the learnable parameter. Returns: - ${log_likelihood_comment} + output(${emission_exps_type}): ${emission_exps_comment} \n + output(${transition_exps_type}): ${transition_exps_comment} \n + output(${log_likelihood_type}): ${log_likelihood_comment} """ helper = LayerHelper('linear_chain_crf', **locals()) @@ -874,11 +845,19 @@ def crf_decoding(input, param_attr, label=None): Args: input(${emission_type}): ${emission_comment} + param_attr(ParamAttr): The parameter attribute for training. + label(${label_type}): ${label_comment} Returns: - ${viterbi_path_comment} + Variable: ${viterbi_path_comment} + + Examples: + .. code-block:: python + + crf_decode = layers.crf_decoding( + input=hidden, param_attr=ParamAttr(name="crfw")) """ helper = LayerHelper('crf_decoding', **locals()) transition = helper.get_parameter(param_attr.name) @@ -893,15 +872,15 @@ def crf_decoding(input, param_attr, label=None): return viterbi_path +@templatedoc() def cos_sim(X, Y): """ - This function performs the cosine similarity between two tensors - X and Y and returns that as the output. + ${comment} Args: - X (Variable): The input X. - Y (Variable): The input Y. - + X (Variable): ${x_comment}. + Y (Variable): ${y_comment}. + Returns: Variable: the output of cosine(X, Y). """ @@ -925,13 +904,13 @@ def dropout(x, dropout_prob, is_test=False, seed=None, name=None): Drop or keep each element of `x` independently. Dropout is a regularization technique for reducing overfitting by preventing neuron co-adaption during - training. The dropout operator randomly set (according to the given dropout + training. The dropout operator randomly sets (according to the given dropout probability) the outputs of some units to zero, while others are remain unchanged. Args: - x (Variable): The input tensor. - dropout_prob (float): Probability of setting units to zero. + x (Variable): The input tensor variable. + dropout_prob (float): Probability of setting units to zero. is_test (bool): A flag indicating whether it is in test phrase or not. seed (int): A Python integer used to create random seeds. If this parameter is set to None, a random seed is used. @@ -941,13 +920,14 @@ def dropout(x, dropout_prob, is_test=False, seed=None, name=None): will be named automatically. Returns: - Variable: A tensor variable. + Variable: A tensor variable is the shape with `x`. Examples: + .. code-block:: python - x = fluid.layers.data(name="data", shape=[32, 32], dtype="float32") - droped = fluid.layers.dropout(input=x, dropout_rate=0.5) + x = fluid.layers.data(name="data", shape=[32, 32], dtype="float32") + droped = fluid.layers.dropout(x, dropout_prob=0.5) """ helper = LayerHelper('dropout', **locals()) @@ -1097,20 +1077,94 @@ def chunk_eval(input, num_chunk_types, excluded_chunk_types=None): """ + **Chunk Evaluator** + This function computes and outputs the precision, recall and F1-score of chunk detection. + For some basics of chunking, please refer to + 'Chunking with Support Vector Machines '. + + ChunkEvalOp computes the precision, recall, and F1-score of chunk detection, + and supports IOB, IOE, IOBES and IO (also known as plain) tagging schemes. + Here is a NER example of labeling for these tagging schemes: + + .. code-block:: python + + ====== ====== ====== ===== == ============ ===== ===== ===== == ========= + Li Ming works at Agricultural Bank of China in Beijing. + ====== ====== ====== ===== == ============ ===== ===== ===== == ========= + IO I-PER I-PER O O I-ORG I-ORG I-ORG I-ORG O I-LOC + IOB B-PER I-PER O O B-ORG I-ORG I-ORG I-ORG O B-LOC + IOE I-PER E-PER O O I-ORG I-ORG I-ORG E-ORG O E-LOC + IOBES B-PER E-PER O O I-ORG I-ORG I-ORG E-ORG O S-LOC + ====== ====== ====== ===== == ============ ===== ===== ===== == ========= + + There are three chunk types(named entity types) including PER(person), ORG(organization) + and LOC(LOCATION), and we can see that the labels have the form -. + + Since the calculations actually use label ids rather than labels, extra attention + should be paid when mapping labels to ids to make CheckEvalOp work. The key point + is that the listed equations are satisfied by ids. + + .. code-block:: python + + tag_type = label % num_tag_type + chunk_type = label / num_tag_type + + where `num_tag_type` is the num of tag types in the tagging scheme, `num_chunk_type` + is the num of chunk types, and `tag_type` get its value from the following table. + + .. code-block:: python + + Scheme Begin Inside End Single + plain 0 - - - + IOB 0 1 - - + IOE - 0 1 - + IOBES 0 1 2 3 + + Still use NER as example, assuming the tagging scheme is IOB while chunk types are ORG, + PER and LOC. To satisfy the above equations, the label map can be like this: + + .. code-block:: python + + B-ORG 0 + I-ORG 1 + B-PER 2 + I-PER 3 + B-LOC 4 + I-LOC 5 + O 6 + + It's not hard to verify the equations noting that the num of chunk types + is 3 and the num of tag types in IOB scheme is 2. For example, the label + id of I-LOC is 5, the tag type id of I-LOC is 1, and the chunk type id of + I-LOC is 2, which consistent with the results from the equations. + Args: input (Variable): prediction output of the network. label (Variable): label of the test data set. chunk_scheme (str): ${chunk_scheme_comment} num_chunk_types (int): ${num_chunk_types_comment} excluded_chunk_types (list): ${excluded_chunk_types_comment} - + Returns: - tuple: tuple containing: (precision, recall, f1_score, - num_infer_chunks, num_label_chunks, - num_correct_chunks) + tuple: tuple containing: precision, recall, f1_score, + num_infer_chunks, num_label_chunks, + num_correct_chunks + + Examples: + .. code-block:: python + + crf = fluid.layers.linear_chain_crf( + input=hidden, label=label, param_attr=ParamAttr(name="crfw")) + crf_decode = fluid.layers.crf_decoding( + input=hidden, param_attr=ParamAttr(name="crfw")) + fluid.layers.chunk_eval( + input=crf_decode, + label=label, + chunk_scheme="IOB", + num_chunk_types=(label_dict_len - 1) / 2) """ helper = LayerHelper("chunk_eval", **locals()) @@ -1166,15 +1220,11 @@ def sequence_conv(input, bias_attr (ParamAttr|None): attributes for bias param_attr (ParamAttr|None): attributes for parameter act (str): the activation type - + Returns: Variable: output of sequence_conv """ - # FIXME(dzh) : want to unify the argument of python layer - # function. So we ignore some unecessary attributes. - # such as, padding_trainable, context_start. - helper = LayerHelper('sequence_conv', **locals()) dtype = helper.input_dtype() filter_shape = [filter_size * input.shape[1], num_filters] @@ -1199,6 +1249,41 @@ def sequence_conv(input, def sequence_softmax(input, param_attr=None, bias_attr=None, use_cudnn=True): + """ + This function computes the softmax activation among all time-steps for each + sequence. The dimension of each time-step should be 1. Thus, the shape of + input Tensor can be either :math:`[N, 1]` or :math:`[N]`, where :math:`N` + is the sum of the length of all sequences. + + For i-th sequence in a mini-batch: + + .. math:: + + Out(X[lod[i]:lod[i+1]], :) = \\frac{\exp(X[lod[i]:lod[i+1], :])}{\sum(\exp(X[lod[i]:lod[i+1], :]))} + + For example, for a mini-batch of 3 sequences with variable-length, + each containing 2, 3, 2 time-steps, the lod of which is [0, 2, 5, 7], + then softmax will be computed among :math:`X[0:2, :]`, :math:`X[2:5, :]`, + :math:`X[5:7, :]`, and :math:`N` turns out to be 7. + + Args: + input (Variable): The input variable which is a LoDTensor. + bias_attr (ParamAttr|None): attributes for bias + param_attr (ParamAttr|None): attributes for parameter + use_cudnn (bool): Use cudnn kernel or not, it is valid only when the cudnn \ + library is installed. Default: True + + Returns: + Variable: output of sequence_softmax + + Examples: + + .. code-block:: python + + x = fluid.layers.data(name='x', shape=[7, 1], + dtype='float32', lod_level=1) + x_sequence_softmax = fluid.layers.sequence_softmax(input=x) + """ helper = LayerHelper('sequence_softmax', **locals()) dtype = helper.input_dtype() softmax_out = helper.create_tmp_variable(dtype) @@ -1211,6 +1296,45 @@ def sequence_softmax(input, param_attr=None, bias_attr=None, use_cudnn=True): def softmax(input, param_attr=None, bias_attr=None, use_cudnn=True, name=None): + """ + The input of the softmax layer is a 2-D tensor with shape N x K (N is the + batch_size, K is the dimension of input feature). The output tensor has the + same shape as the input tensor. + + For each row of the input tensor, the softmax operator squashes the + K-dimensional vector of arbitrary real values to a K-dimensional vector of real + values in the range [0, 1] that add up to 1. + + It computes the exponential of the given dimension and the sum of exponential + values of all the other dimensions in the K-dimensional vector input. + Then the ratio of the exponential of the given dimension and the sum of + exponential values of all the other dimensions is the output of the softmax + operator. + + For each row :math:`i` and each column :math:`j` in Input(X), we have: + + .. math:: + + Out[i, j] = \\frac{\exp(X[i, j])}{\sum_j(exp(X[i, j])} + + Args: + input (Variable): The input variable. + bias_attr (ParamAttr): attributes for bias + param_attr (ParamAttr): attributes for parameter + use_cudnn (bool): Use cudnn kernel or not, it is valid only when the cudnn \ + library is installed. + + Returns: + Variable: output of softmax + + Examples: + + .. code-block:: python + + fc = fluid.layers.fc(input=x, size=10) + softmax = fluid.layers.softmax(input=fc) + + """ helper = LayerHelper('softmax', **locals()) dtype = helper.input_dtype() softmax_out = helper.create_tmp_variable(dtype) @@ -1236,14 +1360,17 @@ def conv2d(input, act=None, name=None): """ - **Convlution2D Layer** - The convolution2D layer calculates the output based on the input, filter - and strides, paddings, dilations, groups parameters. Input(Input) and - Output(Output) are in NCHW format. Where N is batch size, C is the number of + and strides, paddings, dilations, groups parameters. Input and + Output are in NCHW format, where N is batch size, C is the number of channels, H is the height of the feature, and W is the width of the feature. - The details of convolution layer, please refer UFLDL's `convolution, - `_ . + Filter is in MCHW format, where M is the number of output image channels, + C is the number of input image channels, H is the height of the filter, + and W is the width of the filter. If the groups is greater than 1, + C will equal the number of input image channels divided by the groups. + Please refer to UFLDL's `convolution + `_ + for more detials. If bias attribution and activation type are provided, bias is added to the output of the convolution, and the corresponding activation function is applied to the final result. @@ -1254,15 +1381,14 @@ def conv2d(input, Out = \sigma (W \\ast X + b) - In the above equation: + Where: * :math:`X`: Input value, a tensor with NCHW format. * :math:`W`: Filter value, a tensor with MCHW format. * :math:`\\ast`: Convolution operation. * :math:`b`: Bias value, a 2-D tensor with shape [M, 1]. * :math:`\\sigma`: Activation function. - * :math:`Out`: Output value, the shape of :math:`Out` and :math:`X` may be - different. + * :math:`Out`: Output value, the shape of :math:`Out` and :math:`X` may be different. Example: @@ -1273,6 +1399,7 @@ def conv2d(input, Filter shape: :math:`(C_{out}, C_{in}, H_f, W_f)` - Output: + Output shape: :math:`(N, C_{out}, H_{out}, W_{out})` Where @@ -1284,7 +1411,7 @@ def conv2d(input, Args: input (Variable): The input image with [N, C, H, W] format. - num_filters(int): The number of filter. It is as same as the output + num_filters(int): The number of filter. It is as same as the output image channel. filter_size (int|tuple|None): The filter size. If filter_size is a tuple, it must contain two integers, (filter_size_H, filter_size_W). @@ -1307,7 +1434,8 @@ def conv2d(input, bias_attr (ParamAttr): Bias parameter for the Conv2d layer. Default: None use_cudnn (bool): Use cudnn kernel or not, it is valid only when the cudnn library is installed. Default: True - use_mkldnn (bool): Use mkldnn kernels or not. + use_mkldnn (bool): Use mkldnn kernels or not, it is valid only when compiled + with mkldnn library. Default: False act (str): Activation type. Default: None name (str|None): A name for this layer(optional). If set None, the layer will be named automatically. @@ -1323,10 +1451,8 @@ def conv2d(input, Examples: .. code-block:: python - data = fluid.layers.data( - name='data', shape=[3, 32, 32], dtype='float32') - conv2d = fluid.layers.conv2d( - input=data, num_filters=2, filter_size=3, act="relu") + data = fluid.layers.data(name='data', shape=[3, 32, 32], dtype='float32') + conv2d = fluid.layers.conv2d(input=data, num_filters=2, filter_size=3, act="relu") """ num_channels = input.shape[1] @@ -1428,8 +1554,7 @@ def conv3d(input, * :math:`\\ast`: Convolution operation. * :math:`b`: Bias value, a 2-D tensor with shape [M, 1]. * :math:`\\sigma`: Activation function. - * :math:`Out`: Output value, the shape of :math:`Out` and :math:`X` may be - different. + * :math:`Out`: Output value, the shape of :math:`Out` and :math:`X` may be different. Example: @@ -1491,10 +1616,8 @@ def conv3d(input, Examples: .. code-block:: python - data = fluid.layers.data( - name='data', shape=[3, 12, 32, 32], dtype='float32') - conv2d = fluid.layers.conv3d( - input=data, num_filters=2, filter_size=3, act="relu") + data = fluid.layers.data(name='data', shape=[3, 12, 32, 32], dtype='float32') + conv3d = fluid.layers.conv3d(input=data, num_filters=2, filter_size=3, act="relu") """ l_type = 'conv3d' @@ -1571,13 +1694,13 @@ def sequence_pool(input, pool_type): .. code-block:: text x is a 1-level LoDTensor: - x.lod = [[0, 2, 5, 7]] + x.lod = [[2, 3, 2]] x.data = [1, 3, 2, 4, 6, 5, 1] x.dims = [7, 1] then output is a Tensor: out.dim = [3, 1] - with condition len(x.lod[-1]) - 1 == out.dims[0] + with condition len(x.lod[-1]) == out.dims[0] for different pool_type: average: out.data = [2, 4, 3], where 2=(1+3)/2, 4=(2+4+6)/3, 3=(5+1)/2 @@ -1636,13 +1759,13 @@ def sequence_first_step(input): .. code-block:: text x is a 1-level LoDTensor: - x.lod = [[0, 2, 5, 7]] + x.lod = [[2, 3, 2]] x.data = [1, 3, 2, 4, 6, 5, 1] x.dims = [7, 1] then output is a Tensor: out.dim = [3, 1] - with condition len(x.lod[-1]) - 1 == out.dims[0] + with condition len(x.lod[-1]) == out.dims[0] out.data = [1, 2, 5], where 1=first(1,3), 2=first(2,4,6), 5=first(5,1) Args: @@ -1669,13 +1792,13 @@ def sequence_last_step(input): .. code-block:: text x is a 1-level LoDTensor: - x.lod = [[0, 2, 5, 7]] + x.lod = [[2, 3, 2]] x.data = [1, 3, 2, 4, 6, 5, 1] x.dims = [7, 1] then output is a Tensor: out.dim = [3, 1] - with condition len(x.lod[-1]) - 1 == out.dims[0] + with condition len(x.lod[-1]) == out.dims[0] out.data = [3, 6, 1], where 3=last(1,3), 6=last(2,4,6), 1=last(5,1) Args: @@ -1695,6 +1818,7 @@ def sequence_last_step(input): return sequence_pool(input=input, pool_type="last") +@templatedoc() def pool2d(input, pool_size=-1, pool_type="max", @@ -1706,24 +1830,45 @@ def pool2d(input, use_mkldnn=False, name=None): """ - This function adds the operator for pooling in 2 dimensions, using the - pooling configurations mentioned in input parameters. + ${comment} Args: - input (Variable): ${input_comment} - pool_size (int): ${ksize_comment} - pool_type (str): ${pooling_type_comment} + input (Variable): The input tensor of pooling operator. The format of + input tensor is NCHW, where N is batch size, C is + the number of channels, H is the height of the + feature, and W is the width of the feature. + pool_size (int): The side length of pooling windows. All pooling + windows are squares with pool_size on a side. + pool_type: ${pooling_type_comment} pool_stride (int): stride of the pooling layer. pool_padding (int): padding size. - global_pooling (bool): ${global_pooling_comment} - use_cudnn (bool): ${use_cudnn_comment} - ceil_mode (bool): ${ceil_mode_comment} - use_mkldnn (bool): ${use_mkldnn_comment} - name (str): A name for this layer(optional). If set None, the layer - will be named automatically. - + global_pooling: ${global_pooling_comment} + use_cudnn: ${use_cudnn_comment} + ceil_mode: ${ceil_mode_comment} + use_mkldnn: ${use_mkldnn_comment} + name (str|None): A name for this layer(optional). If set None, the + layer will be named automatically. + Returns: - Variable: output of pool2d layer. + Variable: The pooling result. + + Raises: + ValueError: If 'pool_type' is not "max" nor "avg" + ValueError: If 'global_pooling' is False and 'pool_size' is -1 + ValueError: If 'use_cudnn' is not a bool value. + + Examples: + + .. code-block:: python + + data = fluid.layers.data( + name='data', shape=[3, 32, 32], dtype='float32') + conv2d = fluid.layers.pool2d( + input=data, + pool_size=2, + pool_type='max', + pool_stride=1, + global_pooling=False) """ if pool_type not in ["max", "avg"]: raise ValueError( @@ -1849,29 +1994,61 @@ def batch_norm(input, name=None, moving_mean_name=None, moving_variance_name=None, - do_model_average_for_mean_and_var=False): + do_model_average_for_mean_and_var=False, + fuse_with_relu=False): """ - This function helps create an operator to implement - the BatchNorm layer using the configurations from the input parameters. + **Batch Normalization Layer** + + Can be used as a normalizer function for conv2d and fully_connected operations. + The required data format for this layer is one of the following: + + 1. NHWC `[batch, in_height, in_width, in_channels]` + + 2. NCHW `[batch, in_channels, in_height, in_width]` + + Refer to `Batch Normalization: Accelerating Deep Network Training by Reducing + Internal Covariate Shift `_ + for more details. + + :math:`input` is the input features over a mini-batch. + + .. math:: + + \\mu_{\\beta} &\\gets \\frac{1}{m} \\sum_{i=1}^{m} x_i \\qquad &//\\ + \ mini-batch\ mean \\\\ + \\sigma_{\\beta}^{2} &\\gets \\frac{1}{m} \\sum_{i=1}^{m}(x_i - \\ + \\mu_{\\beta})^2 \\qquad &//\ mini-batch\ variance \\\\ + \\hat{x_i} &\\gets \\frac{x_i - \\mu_\\beta} {\\sqrt{\\ + \\sigma_{\\beta}^{2} + \\epsilon}} \\qquad &//\ normalize \\\\ + y_i &\\gets \\gamma \\hat{x_i} + \\beta \\qquad &//\ scale\ and\ shift Args: - input (Variable): the input variable. - act (str): activation type - is_test (bool): whether to run batch_norm as test mode. - momentum (float): momentum - epsilon (float): epsilon, default 1e-05 - param_attr (ParamAttr|None): attributes for parameter - bias_attr (ParamAttr|None): attributes for bias - data_layout (str): data layout, default NCHW - in_place (bool): if True, do not create tmp variable - use_mkldnn (bool): ${use_mkldnn_comment} - name (str): The name of this layer. It is optional. - moving_mean_name (str): The name of moving mean variable name, optional. - moving_variance_name (str): The name of moving variance name, optional. - do_model_average_for_mean_and_var (bool): + input(variable): The input variable which is a LoDTensor. + act(string, Default None): Activation type, linear|relu|prelu|... + is_test(bool, Default False): Used for training or training. + momentum(float, Default 0.9): + epsilon(float, Default 1e-05): + param_attr(ParamAttr): The parameter attribute for Parameter `scale`. + bias_attr(ParamAttr): The parameter attribute for Parameter `bias`. + data_layout(string, default NCHW): NCHW|NHWC + in_place(bool, Default False): Make the input and output of batch norm reuse memory. + use_mkldnn(bool, Default false): ${use_mkldnn_comment} + name(string, Default None): A name for this layer(optional). If set None, the layer + will be named automatically. + moving_mean_name(string, Default None): The name of moving_mean which store the global Mean. + moving_variance_name(string, Default None): The name of the moving_variance which store the global Variance. + do_model_average_for_mean_and_var(bool, Default False): Do model average for mean and variance or not. + fuse_with_relu (bool): if True, this OP performs relu after batch norm. Returns: - Variable: output of batch_norm layer. + Variable: A tensor variable which is the result after applying batch normalization on the input. + + Examples: + + .. code-block:: python + + hidden1 = fluid.layers.fc(input=x, size=200, param_attr='fc1.w') + hidden2 = fluid.layers.batch_norm(input=hidden1) """ helper = LayerHelper('batch_norm', **locals()) dtype = helper.input_dtype() @@ -1947,12 +2124,14 @@ def batch_norm(input, "momentum": momentum, "epsilon": epsilon, "is_test": is_test, - "use_mkldnn": use_mkldnn + "use_mkldnn": use_mkldnn, + "fuse_with_relu": fuse_with_relu }) return helper.append_activation(batch_norm_out) +@templatedoc() def layer_norm(input, scale=True, shift=True, @@ -1963,20 +2142,11 @@ def layer_norm(input, act=None, name=None): """ - **Layer Normalization** - - Assume feature vectors exist on dimensions - :attr:`begin_norm_axis ... rank(input)` and calculate the moment statistics - along these dimensions for each feature vector :math:`a` with size - :math:`H`, then normalize each feature vector using the corresponding - statistics. After that, apply learnable gain and bias on the normalized - tensor to scale and shift if :attr:`scale` and :attr:`shift` are set. - - Refer to `Layer Normalization `_ + ${comment} The formula is as follows: - .. math:: + .. math:: \\mu & = \\frac{1}{H}\\sum_{i=1}^{H} a_i @@ -1984,6 +2154,15 @@ def layer_norm(input, h & = f(\\frac{g}{\\sigma}(a - \\mu) + b) + * :math:`a`: the vector representation of the summed inputs to the neurons + in that layer. + + * :math:`H`: the number of hidden units in a layers + + * :math:`g`: the trainable scale parameter. + + * :math:`b`: the trainable bias parameter. + Args: input(Variable): The input tensor variable. scale(bool): Whether to learn the adaptive gain :math:`g` after @@ -2002,14 +2181,13 @@ def layer_norm(input, name (str): The name of this layer. It is optional. Returns: - Variable: A tensor variable with the same shape as the input. + ${y_comment} Examples: - .. code-block:: python - data = fluid.layers.data( - name='data', shape=[3, 32, 32], dtype='float32') - x = fluid.layers.layer_norm(input=data, begin_norm_axis=1) + >>> data = fluid.layers.data(name='data', shape=[3, 32, 32], + >>> dtype='float32') + >>> x = fluid.layers.layer_norm(input=data, begin_norm_axis=1) """ helper = LayerHelper('layer_norm', **locals()) dtype = helper.input_dtype() @@ -2050,34 +2228,6 @@ def layer_norm(input, return helper.append_activation(layer_norm_out) -def beam_search_decode(ids, scores, name=None): - """ - ${beam_search_decode} - - Args: - ids (Variable): ${ids_comment} - scores (Variable): ${scores_comment} - name (str): The name of this layer. It is optional. - - Returns: - tuple: a tuple of two output variable: sentence_ids, sentence_scores - """ - helper = LayerHelper('beam_search_decode', **locals()) - sentence_ids = helper.create_tmp_variable(dtype=ids.dtype) - sentence_scores = helper.create_tmp_variable(dtype=ids.dtype) - - helper.append_op( - type="beam_search_decode", - inputs={"Ids": ids, - "Scores": scores}, - outputs={ - "SentenceIds": sentence_ids, - "SentenceScores": sentence_scores - }) - - return sentence_ids, sentence_scores - - def conv2d_transpose(input, num_filters, output_size=None, @@ -2102,32 +2252,36 @@ def conv2d_transpose(input, represent height and width, respectively. The details of convolution transpose layer, please refer to the following explanation and references `therein `_. + If bias attribution and activation type are provided, bias is added to + the output of the convolution, and the corresponding activation function + is applied to the final result. For each input :math:`X`, the equation is: .. math:: - Out = W \\ast X + Out = \sigma (W \\ast X + b) - In the above equation: + Where: * :math:`X`: Input value, a tensor with NCHW format. * :math:`W`: Filter value, a tensor with MCHW format. - * :math:`\\ast` : Convolution transpose operation. - * :math:`Out`: Output value, the shape of :math:`Out` and :math:`X` may be - different. + * :math:`\\ast`: Convolution operation. + * :math:`b`: Bias value, a 2-D tensor with shape [M, 1]. + * :math:`\\sigma`: Activation function. + * :math:`Out`: Output value, the shape of :math:`Out` and :math:`X` may be different. Example: - Input: - Input shape: $(N, C_{in}, H_{in}, W_{in})$ + Input shape: :math:`(N, C_{in}, H_{in}, W_{in})` - Filter shape: $(C_{in}, C_{out}, H_f, W_f)$ + Filter shape: :math:`(C_{in}, C_{out}, H_f, W_f)` - Output: - Output shape: $(N, C_{out}, H_{out}, W_{out})$ + Output shape: :math:`(N, C_{out}, H_{out}, W_{out})` Where @@ -2181,15 +2335,20 @@ def conv2d_transpose(input, Examples: .. code-block:: python - data = fluid.layers.data( - name='data', shape=[3, 32, 32], dtype='float32') - conv2d_transpose = fluid.layers.conv2d_transpose( - input=data, num_filters=2, filter_size=3) + data = fluid.layers.data(name='data', shape=[3, 32, 32], dtype='float32') + conv2d_transpose = fluid.layers.conv2d_transpose(input=data, num_filters=2, filter_size=3) """ - helper = LayerHelper("conv2d_transpose", **locals()) + + input_channel = input.shape[1] + + op_type = 'conv2d_transpose' + if (input_channel == groups and num_filters == input_channel and + not use_cudnn): + op_type = 'depthwise_conv2d_transpose' + + helper = LayerHelper(op_type, **locals()) if not isinstance(input, Variable): raise TypeError("Input of conv2d_transpose must be Variable") - input_channel = input.shape[1] padding = utils.convert_to_list(padding, 2, 'padding') stride = utils.convert_to_list(stride, 2, 'stride') @@ -2223,7 +2382,7 @@ def conv2d_transpose(input, pre_bias = helper.create_tmp_variable(dtype=input.dtype) helper.append_op( - type='conv2d_transpose', + type=op_type, inputs={'Input': [input], 'Filter': [img_filter]}, outputs={'Output': pre_bias}, @@ -2264,32 +2423,36 @@ def conv3d_transpose(input, two elements. These two elements represent height and width, respectively. The details of convolution transpose layer, please refer to the following explanation and references `therein `_. + If bias attribution and activation type are provided, bias is added to + the output of the convolution, and the corresponding activation function + is applied to the final result. For each input :math:`X`, the equation is: .. math:: - Out = W \\ast X + Out = \sigma (W \\ast X + b) In the above equation: * :math:`X`: Input value, a tensor with NCDHW format. * :math:`W`: Filter value, a tensor with MCDHW format. - * :math:`\\ast` : Convolution transpose operation. - * :math:`Out`: Output value, the shape of :math:`Out` and :math:`X` may be - different. + * :math:`\\ast`: Convolution operation. + * :math:`b`: Bias value, a 2-D tensor with shape [M, 1]. + * :math:`\\sigma`: Activation function. + * :math:`Out`: Output value, the shape of :math:`Out` and :math:`X` may be different. Example: - Input: - Input shape: $(N, C_{in}, D_{in}, H_{in}, W_{in})$ + Input shape: :math:`(N, C_{in}, D_{in}, H_{in}, W_{in})` - Filter shape: $(C_{in}, C_{out}, D_f, H_f, W_f)$ + Filter shape: :math:`(C_{in}, C_{out}, D_f, H_f, W_f)` - Output: - Output shape: $(N, C_{out}, D_{out}, H_{out}, W_{out})$ + Output shape: :math:`(N, C_{out}, D_{out}, H_{out}, W_{out})` Where @@ -2344,10 +2507,8 @@ def conv3d_transpose(input, Examples: .. code-block:: python - data = fluid.layers.data( - name='data', shape=[3, 12, 32, 32], dtype='float32') - conv2d_transpose = fluid.layers.conv3d_transpose( - input=data, num_filters=2, filter_size=3) + data = fluid.layers.data(name='data', shape=[3, 12, 32, 32], dtype='float32') + conv3d_transpose = fluid.layers.conv3d_transpose(input=data, num_filters=2, filter_size=3) """ l_type = "conv3d_transpose" helper = LayerHelper(l_type, **locals()) @@ -2418,18 +2579,18 @@ def sequence_expand(x, y, ref_level=-1, name=None): * Case 1 x is a LoDTensor: - x.lod = [[0, 2, 4]] + x.lod = [[2, 2]] x.data = [[a], [b], [c], [d]] x.dims = [4, 1] y is a LoDTensor: - y.lod = [[0, 2, 4], - [0, 3, 6, 7, 8]] + y.lod = [[2, 2], + [3, 3, 1, 1]] ref_level: 0 then output is a 1-level LoDTensor: - out.lod = [[0, 2, 4, 6, 8]] + out.lod = [[2, 2, 2, 2]] out.data = [[a], [b], [a], [b], [c], [d], [c], [d]] out.dims = [8, 1] @@ -2439,7 +2600,7 @@ def sequence_expand(x, y, ref_level=-1, name=None): x.dims = [3, 1] y is a LoDTensor: - y.lod = [[0, 2, 2, 5]] + y.lod = [[2, 0, 3]] ref_level: -1 @@ -2477,21 +2638,89 @@ def sequence_expand(x, y, ref_level=-1, name=None): return tmp -def beam_search(pre_ids, ids, scores, beam_size, end_id, level=0): - ''' - This function implements the beam search algorithm. +def beam_search(pre_ids, + pre_scores, + ids, + scores, + beam_size, + end_id, + level=0, + name=None): + """ + Beam search is a classical algorithm for selecting candidate words in a + machine translation task. - Args: - pre_ids (Variable): ${pre_ids_comment} - ids (Variable): ${ids_comment} - scores (Variable): ${scores_comment} - beam_size (int): ${beam_size_comment} - end_id (int): ${end_id_comment} - level (int): ${level_comment} + Refer to `Beam search `_ + for more details. + This layer does the search in beams for one time step. Specifically, it + selects the top-K candidate word ids of current step from :attr:`ids` + according to their :attr:`scores` for all source sentences, where K is + :attr:`beam_size` and :attr:`ids, scores` are predicted results from the + computation cell. Additionally, :attr:`pre_ids` and :attr:`pre_scores` are + the output of beam_search at previous step, they are needed for special use + to handle ended candidate translations. + + Note that the :attr:`scores` passed in should be accumulated scores, and + length penalty should be done with extra operators before calculating the + accumulated scores if needed, also suggest finding top-K before it and + using the top-K candidates following. + + Please see the following demo for a fully beam search usage example: + + fluid/tests/book/test_machine_translation.py + + Args: + pre_ids(Variable): The LodTensor variable which is the output of + beam_search at previous step. It should be a LodTensor with shape + :math:`(batch_size, 1)` and lod + :math:`[[0, 1, ... , batch_size], [0, 1, ..., batch_size]]` at the + first step. + pre_scores(Variable): The LodTensor variable which is the output of + beam_search at previous step. + ids(Variable): The LodTensor variable containing the candidates ids. + Its shape should be :math:`(batch_size \\times beam_size, K)`, + where :math:`K` supposed to be :attr:`beam_size`. + scores(Variable): The LodTensor variable containing the accumulated + scores corresponding to :attr:`ids` and its shape is the same as + the shape of :attr:`ids`. + beam_size(int): The beam width used in beam search. + end_id(int): The id of end token. + level(int, default 0): It can be ignored and mustn't change currently. + It means the source level of lod, which is explained as following. + The lod level of :attr:`ids` should be 2. The first level is source + level which describes how many prefixes (branchs) for each source + sentece (beam), and the second level is sentence level which + describes how these candidates belong to the prefix. The paths + linking prefixes and selected candidates are organized and reserved + in lod. + name(str|None): A name for this layer(optional). If set None, the layer + will be named automatically. + Returns: - tuple: a tuple of beam_search output variables: selected_ids, selected_scores - ''' + Variable: The LodTensor pair containing the selected ids and the \ + corresponding scores. + + Examples: + .. code-block:: python + + # Suppose `probs` contains predicted results from the computation + # cell and `pre_ids` and `pre_scores` is the output of beam_search + # at previous step. + topk_scores, topk_indices = layers.topk(probs, k=beam_size) + accu_scores = layers.elementwise_add( + x=layers.log(x=topk_scores)), + y=layers.reshape( + pre_scores, shape=[-1]), + axis=0) + selected_ids, selected_scores = layers.beam_search( + pre_ids=pre_ids, + pre_scores=pre_scores, + ids=topk_indices, + scores=accu_scores, + beam_size=beam_size, + end_id=end_id) + """ helper = LayerHelper('beam_search', **locals()) score_type = scores.dtype id_type = ids.dtype @@ -2503,6 +2732,7 @@ def beam_search(pre_ids, ids, scores, beam_size, end_id, level=0): type='beam_search', inputs={ 'pre_ids': pre_ids, + 'pre_scores': pre_scores, 'ids': ids, 'scores': scores, }, @@ -2520,6 +2750,56 @@ def beam_search(pre_ids, ids, scores, beam_size, end_id, level=0): return selected_ids, selected_scores +def beam_search_decode(ids, scores, beam_size, end_id, name=None): + """ + Beam Search Decode Layer. This layer constructs the full hypotheses for + each source sentence by walking back along the LoDTensorArray :attr:`ids` + whose lods can be used to restore the path in the beam search tree. + Please see the following demo for a fully beam search usage example: + fluid/tests/book/test_machine_translation.py + + Args: + ids(Variable): The LodTensorArray variable containing the selected ids + of all steps. + scores(Variable): The LodTensorArray variable containing the selected + scores of all steps. + beam_size(int): The beam width used in beam search. + end_id(int): The id of end token. + name(str|None): A name for this layer(optional). If set None, the layer + will be named automatically. + + Returns: + Variable: The LodTensor pair containing the generated id sequences \ + and the corresponding scores. The shapes and lods of the two \ + LodTensor are same. The lod level is 2 and the two levels \ + separately indicate how many hypotheses each source sentence has \ + and how many ids each hypothesis has. + + Examples: + .. code-block:: python + # Suppose `ids` and `scores` are LodTensorArray variables reserving + # the selected ids and scores of all steps + finished_ids, finished_scores = layers.beam_search_decode( + ids, scores, beam_size=5, end_id=0) + """ + helper = LayerHelper('beam_search_decode', **locals()) + sentence_ids = helper.create_tmp_variable(dtype=ids.dtype) + sentence_scores = helper.create_tmp_variable(dtype=ids.dtype) + + helper.append_op( + type="beam_search_decode", + inputs={"Ids": ids, + "Scores": scores}, + outputs={ + "SentenceIds": sentence_ids, + "SentenceScores": sentence_scores + }, + attrs={"beam_size": beam_size, + "end_id": end_id}) + + return sentence_ids, sentence_scores + + def lstm_unit(x_t, hidden_t_prev, cell_t_prev, @@ -2698,23 +2978,24 @@ def reduce_sum(input, dim=None, keep_dim=False, name=None): def reduce_mean(input, dim=None, keep_dim=False, name=None): """ - Computes the mean of tensor elements over the given dimension. + Computes the mean of the input tensor's elements along the given dimension. Args: input (Variable): The input variable which is a Tensor or LoDTensor. - dim (list|int|None): The dimensions along which the mean is computed. If - :attr:`None`, compute the mean over all elements of :attr:`input` - and return a Tensor variable with a single element, otherwise + dim (list|int|None): The dimension along which the mean is computed. If + `None`, compute the mean over all elements of :attr:`input` + and return a variable with a single element, otherwise it must be in the range :math:`[-rank(input), rank(input))`. If - :math:`dim[i] < 0`, the dimension to reduce is :math:`rank + dim[i]`. + :math:`dim[i] < 0`, the dimension to reduce is + :math:`rank(input) + dim[i]`. keep_dim (bool): Whether to reserve the reduced dimension in the output Tensor. The result tensor will have one fewer dimension than the :attr:`input` unless :attr:`keep_dim` is true. - name(str|None): A name for this layer(optional). If set None, the layer + name(str|None): A name for this layer(optional). If set `None`, the layer will be named automatically. Returns: - Variable: The reduced Tensor variable. + Variable: The reduced mean Variable. Examples: .. code-block:: python @@ -2936,7 +3217,7 @@ def split(input, num_or_sections, dim=-1, name=None): will be named automatically. Returns: - List: The list of segmented tensor variables. + list(Variable): The list of segmented tensor variables. Examples: .. code-block:: python @@ -2987,32 +3268,33 @@ def l2_normalize(x, axis, epsilon=1e-12, name=None): norm. For a 1-D tensor (`dim` is fixed to 0), this layer computes .. math:: - y = \frac{x}{ \sqrt{\sum {x^2} + epsion }} + + y = \\frac{x}{ \sqrt{\sum {x^2} + epsion }} For `x` with more dimensions, this layer independently normalizes each 1-D slice along dimension `axis`. Args: x(Variable|list): The input tensor to l2_normalize layer. - axis(int): The axis on which to apply normalization. If `axis < 0`, + axis(int): The axis on which to apply normalization. If `axis < 0`, \ the dimension to normalization is rank(X) + axis. -1 is the last dimension. - epsilon(float): The epsilon value is used to avoid division by zero, + epsilon(float): The epsilon value is used to avoid division by zero, \ the defalut value is 1e-10. - name(str|None): A name for this layer(optional). If set None, the layer + name(str|None): A name for this layer(optional). If set None, the layer \ will be named automatically. - Returns: - Variable: The output tensor variable. + Variable: The output tensor variable is the same shape with `x`. Examples: + .. code-block:: python - data = fluid.layers.data(name="data", - shape=(3, 17, 13), - dtype="float32") - normed = fluid.layers.l2_normalize(x=data, axis=1) + data = fluid.layers.data(name="data", + shape=(3, 17, 13), + dtype="float32") + normed = fluid.layers.l2_normalize(x=data, axis=1) """ if len(x.shape) == 1: @@ -3144,25 +3426,51 @@ def topk(input, k, name=None): This operator is used to find values and indices of the k largest entries for the last dimension. - If the input is a vector (rank=1), finds the k largest entries in the vector + If the input is a vector (1-D Tensor), finds the k largest entries in the vector and outputs their values and indices as vectors. Thus values[j] is the j-th largest entry in input, and its index is indices[j]. If the input is a Tensor with higher rank, this operator computes the top k entries along the last dimension. + For example: + + .. code-block:: text + + If: + input = [[5, 4, 2, 3], + [9, 7, 10, 25], + [6, 2, 10, 1]] + k = 2 + + Then: + The first output: + values = [[5, 4], + [10, 25], + [6, 10]] + + The second output: + indices = [[0, 1], + [2, 3], + [0, 2]] + Args: input(Variable): The input variable which can be a vector or Tensor with higher rank. - k(int): An integer value to specify the top k largest elements. + k(int): The number of top elements to look for along the last dimension + of input. name(str|None): A name for this layer(optional). If set None, the layer will be named automatically. + Default: None Returns: - values(Variable): The k largest elements along each last dimensional - slice. - indices(Variable): The indices of values within the last dimension of - input. + Tuple[Variable]: A tuple with two elements. Each element is a Variable. + The first one is k largest elements along each last + dimensional slice. The second one is indices of values + within the last dimension of input. + + Raises: + ValueError: If k < 1 or k is not less than the last dimension of input Examples: .. code-block:: python @@ -3170,7 +3478,7 @@ def topk(input, k, name=None): top5_values, top5_indices = layers.topk(input, k=5) """ shape = input.shape - if k < 1 and k >= shape[-1]: + if k < 1 or k >= shape[-1]: raise ValueError("k must be greater than 0 and less than %d." % (shape[-1])) @@ -3188,8 +3496,7 @@ def topk(input, k, name=None): return values, indices -def edit_distance(input, label, normalized=True, ignored_tokens=None, - name=None): +def edit_distance(input, label, normalized=True, ignored_tokens=None): """ EditDistance operator computes the edit distances between a batch of hypothesis strings and their references. Edit distance, also called @@ -3203,21 +3510,21 @@ def edit_distance(input, label, normalized=True, ignored_tokens=None, "kitten" -> "sitten" -> "sittin" -> "sitting" - Input(Hyps) is a LoDTensor consisting of all the hypothesis strings with + The input is a LoDTensor consisting of all the hypothesis strings with the total number denoted by `batch_size`, and the separation is specified by the LoD information. And the `batch_size` reference strings are arranged - in order in the same way in the LoDTensor Input(Refs). + in order in the same way in the input LoDTensor. - Output(Out) contains the `batch_size` results and each stands for the edit + The output contains the `batch_size` results and each stands for the edit distance for a pair of strings respectively. If Attr(normalized) is true, the edit distance will be divided by the length of reference string. Args: input(Variable): The indices for hypothesis strings. label(Variable): The indices for reference strings. - normalized(bool): Indicated whether to normalize the edit distance by + normalized(bool, default True): Indicated whether to normalize the edit distance by the length of reference string. - ignored_tokens(list of int): Tokens that should be removed before + ignored_tokens(list, default None): Tokens that should be removed before calculating edit distance. name (str): The name of this layer. It is optional. @@ -3229,7 +3536,6 @@ def edit_distance(input, label, normalized=True, ignored_tokens=None, x = fluid.layers.data(name='x', shape=[8], dtype='float32') y = fluid.layers.data(name='y', shape=[7], dtype='float32') - cost = fluid.layers.edit_distance(input=x,label=y) """ helper = LayerHelper("edit_distance", **locals()) @@ -3270,6 +3576,7 @@ def edit_distance(input, label, normalized=True, ignored_tokens=None, def ctc_greedy_decoder(input, blank, name=None): """ This op is used to decode sequences by greedy policy by below steps: + 1. Get the indexes of max value for each row in input. a.k.a. numpy.argmax(input, axis=0). 2. For each sequence in result of step1, merge repeated tokens between two @@ -3291,7 +3598,7 @@ def ctc_greedy_decoder(input, blank, name=None): [0.2, 0.2, 0.1, 0.5], [0.5, 0.1, 0.3, 0.1]] - input.lod = [[0, 4, 8]] + input.lod = [[4, 4]] Then: @@ -3299,7 +3606,7 @@ def ctc_greedy_decoder(input, blank, name=None): [1], [3]] - output.lod = [[0, 2, 3]] + output.lod = [[2, 1]] Args: @@ -3316,7 +3623,7 @@ def ctc_greedy_decoder(input, blank, name=None): Returns: Variable: CTC greedy decode result. If all the sequences in result were - empty, the result LoDTensor will be [-1] with LoD [[0]] and dims [1, 1]. + empty, the result LoDTensor will be [-1] with LoD [[]] and dims [1, 1]. Examples: .. code-block:: python @@ -3349,35 +3656,33 @@ def warpctc(input, label, blank=0, norm_by_times=False): input tensor. Args: - input(Variable): (LodTensor, default: LoDTensor), - the unscaled probabilities of variable-length sequences, - which is a 2-D Tensor with LoD information. - It's shape is [Lp, num_classes + 1], where Lp is the sum of all input - sequences' length and num_classes is the true number of classes. - (not including the blank label). - label(Variable): (LodTensor, default: LoDTensor), the ground truth - of variable-length sequence, which is a 2-D Tensor with LoD - information. It is of the shape [Lg, 1], where Lg is th sum of - all labels' length. - blank (int): default 0, the blank label index of Connectionist - Temporal Classification (CTC) loss, which is in the - half-opened interval [0, num_classes + 1). - norm_by_times (bool): default false, whether to normalize - the gradients by the number of time-step, which is also the - sequence's length. There is no need to normalize the gradients - if warpctc layer was follewed by a mean_op. + input (Variable): The unscaled probabilities of variable-length sequences, + which is a 2-D Tensor with LoD information. + It's shape is [Lp, num_classes + 1], where Lp is the sum of all input + sequences' length and num_classes is the true number of classes. + (not including the blank label). + label (Variable): The ground truth of variable-length sequence, + which is a 2-D Tensor with LoD information. It is of the shape [Lg, 1], + where Lg is th sum of all labels' length. + blank (int, default 0): The blank label index of Connectionist + Temporal Classification (CTC) loss, which is in the + half-opened interval [0, num_classes + 1). + norm_by_times(bool, default false): Whether to normalize the gradients + by the number of time-step, which is also the sequence's length. + There is no need to normalize the gradients if warpctc layer was + follewed by a mean_op. Returns: Variable: The Connectionist Temporal Classification (CTC) loss, which is a 2-D Tensor of the shape [batch_size, 1]. Examples: + .. code-block:: python - y = layers.data( - name='y', shape=[11, 8], dtype='float32', lod_level=1) - y_predict = layers.data( - name='y_predict', shape=[11, 1], dtype='float32') - cost = layers.warpctc(input=y_predict, label=y) + + label = fluid.layers.data(shape=[11, 8], dtype='float32', lod_level=1) + predict = fluid.layers.data(shape=[11, 1], dtype='float32') + cost = fluid.layers.warpctc(input=predict, label=label) """ helper = LayerHelper('warpctc', **locals()) @@ -3407,16 +3712,20 @@ def sequence_reshape(input, new_dim): x is a LoDTensor: x.lod = [[0, 2, 6]] - x.data = [[1, 2], [3, 4], - [5, 6], [7, 8], [9, 10], [11, 12]] + x.data = [[1, 2], [3, 4], + [5, 6], [7, 8], + [9, 10], [11, 12]] x.dims = [6, 2] set new_dim = 4 then out is a LoDTensor: + out.lod = [[0, 1, 3]] - out.data = [[1, 2, 3, 4], - [5, 6, 7, 8], [9, 10, 11, 12]] + + out.data = [[1, 2, 3, 4], + [5, 6, 7, 8], + [9, 10, 11, 12]] out.dims = [3, 4] Currently, only 1-level LoDTensor is supported and please make sure @@ -3424,19 +3733,19 @@ def sequence_reshape(input, new_dim): no remainder for each sequence. Args: - input (Variable): (LodTensor, default: LoDTensor), a 2-D LoDTensor - with shape being [N, M] where M for dimension. - new_dim (int): New dimension which the input LoDTensor is reshaped to. + + input (Variable): A 2-D LoDTensor with shape being [N, M] where M for dimension. + new_dim (int): New dimension that the input LoDTensor is reshaped to. Returns: + Variable: Reshaped LoDTensor according to new dimension. Examples: .. code-block:: python - x = fluid.layers.data(name='x', shape=[5, 20], - dtype='float32', lod_level=1) - x_reshaped = layers.sequence_reshape(input=x, new_dim=10) + x = fluid.layers.data(shape=[5, 20], dtype='float32', lod_level=1) + x_reshaped = fluid.layers.sequence_reshape(input=x, new_dim=10) """ helper = LayerHelper('sequence_reshape', **locals()) out = helper.create_tmp_variable(helper.input_dtype()) @@ -3466,13 +3775,41 @@ def nce(input, input (Variable): input variable. label (Variable): label. num_total_classes (int):${num_total_classes_comment} - sample_weight (int): ${sample_weight_comment} + sample_weight (Variable|None): A Variable of shape [batch_size, 1] + storing a weight for each sample. The default weight for each + sample is 1.0. param_attr (ParamAttr|None): attributes for parameter bias_attr (ParamAttr|None): attributes for bias num_neg_samples (int): ${num_neg_samples_comment} - + Returns: - Variable: output of nce layer. + Variable: The output nce loss. + + Examples: + .. code-block:: python + + window_size = 5 + words = [] + for i in xrange(window_size): + words.append(layers.data( + name='word_{0}'.format(i), shape=[1], dtype='int64')) + + dict_size = 10000 + label_word = int(window_size / 2) + 1 + + embs = [] + for i in xrange(window_size): + if i == label_word: + continue + + emb = layers.embedding(input=words[i], size=[dict_size, 32], + param_attr='emb.w', is_sparse=True) + embs.append(emb) + + embs = layers.concat(input=embs, axis=1) + loss = layers.nce(input=embs, label=words[label_word], + num_total_classes=dict_size, param_attr='nce.w', + bias_attr='nce.b') """ helper = LayerHelper('nce', **locals()) assert isinstance(input, Variable) @@ -3590,8 +3927,6 @@ def hsigmoid(input, label, num_classes=2, param_attr=None, bias_attr=None): def transpose(x, perm, name=None): """ - **transpose Layer** - Permute the dimensions of `input` according to `perm`. The `i`-th dimension of the returned tensor will correspond to the @@ -3681,8 +4016,6 @@ def im2sequence(input, filter_size=1, stride=1, padding=0, name=None): Examples: - As an example: - .. code-block:: text Given: @@ -3724,9 +4057,9 @@ def im2sequence(input, filter_size=1, stride=1, padding=0, name=None): output.dims = {8, 9} - output.lod = [[0, 4, 8]] + output.lod = [[4, 4]] - The simple usage is: + Examples: .. code-block:: python @@ -3759,29 +4092,13 @@ def im2sequence(input, filter_size=1, stride=1, padding=0, name=None): return out +@templatedoc() def row_conv(input, future_context_size, param_attr=None, act=None): - """Row Conv Operator. This layer will apply lookahead convolution to - **input**. The input variable should be a 2D LoDTensor with shape [T, D]. - Parameters with shape [future_context_size + 1, D] will be created. The math - equation of row convolution is as follows: - - .. math:: - Out_{i} = \sum_{j = i} ^ {i + \\tau} X_{j} \odot W_{i - j} - - In the above equation: - - * :math:`Out_{i}`: The i-th row of output variable with shape [1, D]. - * :math:`\\tau`: Future context size. - * :math:`X_{j}`: The j-th row of input variable with shape [1, D]. - * :math:`W_{i-j}`: The (i-j)-th row of parameters with shape [1, D]. - - More details about row_conv please refer to the paper \ - (http://www.cs.cmu.edu/~dyogatam/papers/wang+etal.iclrworkshop2016.pdf) and - the design document \ - (https://github.com/PaddlePaddle/Paddle/issues/2228#issuecomment-303903645). + """ + ${comment} Args: - input (Variable): Input variable, a 2D LoDTensor with shape [T, D]. + input (${x_type}): ${x_comment}. future_context_size (int): Future context size. Please note, the shape of convolution kernel is [future_context_size + 1, D]. param_attr (ParamAttr): Attributes of parameters, including @@ -3789,14 +4106,13 @@ def row_conv(input, future_context_size, param_attr=None, act=None): act (str): Non-linear activation to be applied to output variable. Returns: - Variable: The output tensor with same shape as input tensor. + ${out_comment}. Examples: - .. code-block:: python - - x = fluid.layers.data(name='x', shape=[16], - dtype='float32', lod_level=1) - out = fluid.layers.row_conv(input=x, future_context_size=2) + >>> import paddle.fluid as fluid + >>> x = fluid.layers.data(name='x', shape=[16], + >>> dtype='float32', lod_level=1) + >>> out = fluid.layers.row_conv(input=x, future_context_size=2) """ helper = LayerHelper('row_conv', **locals()) dtype = helper.input_dtype() @@ -3812,42 +4128,23 @@ def row_conv(input, future_context_size, param_attr=None, act=None): return helper.append_activation(out) +@templatedoc() def multiplex(inputs, index): """ - **Multiplex Layer** - - Referring to the given index variable, this layer selects rows from the - input variables to construct a multiplex variable. Assuming that there are - :math:`m` input variables and :math:`I_i` represents the i-th input - variable and :math:`i` is in [0, :math:`m`). All input variables are - tensors with same shape [:math:`d_0`, :math:`d_1`, ..., :math:`d_R`]. - Please note that rank of the input tensor should be at least 2. Each input - variable will be treated as a 2-D matrix with shape [:math:`M`, :math:`N`] - where :math:`M` for :math:`d_0` and :math:`N` for :math:`d_1` * :math:`d_2` - * ... * :math:`d_R`. Let :math:`I_i[j]` be the j-th row of the i-th input - variable. The given index variable should be a 2-D tensor with shape - [:math:`M`, 1]. Let `ID[i]` be the i-th index value of the index variable. - Then the output variable will be a tensor with shape [:math:`d_0`, - :math:`d_1`, ..., :math:`d_R`]. If we treat the output tensor as a 2-D - matrix with shape [:math:`M`, :math:`N`] and let :math:`O[i]` be the i-th - row of the matrix, then `O[i]` is equal to :math:`I_{ID[i]}[i]`. + ${comment} + + >>> import paddle.fluid as fluid + >>> x1 = fluid.layers.data(name='x1', shape=[4], dtype='float32') + >>> x2 = fluid.layers.data(name='x2', shape=[4], dtype='float32') + >>> index = fluid.layers.data(name='index', shape=[1], dtype='int32') + >>> out = fluid.layers.multiplex(inputs=[x1, x2], index=index) Args: - inputs (list): A list of variables to gather from. All variables have the - same shape and the rank is at least 2. - index (Variable): Tensor, index variable which is a 2-D tensor - with shape [M, 1] where M is the batch size. + inputs (list): ${x_comment}. + index (${ids_type}): ${ids_comment}. Returns: - Variable: Multiplex variable gathered from input variables. - - Examples: - .. code-block:: python - - x1 = fluid.layers.data(name='x1', shape=[4], dtype='float32') - x2 = fluid.layers.data(name='x2', shape=[4], dtype='float32') - index = fluid.layers.data(name='index', shape=[1], dtype='int32') - out = fluid.layers.multiplex(inputs=[x1, x2], index=index) + ${out_comment}. """ helper = LayerHelper('multiplex', **locals()) @@ -3933,31 +4230,30 @@ def softmax_with_cross_entropy(logits, label, soft_label=False): def smooth_l1(x, y, inside_weight=None, outside_weight=None, sigma=None): """ - **Smooth L1 Loss Operator. ** - - This operator computes the smooth L1 loss for X and Y. - The operator takes the first dimension of X and Y as batch size. + This layer computes the smooth L1 loss for Variable :attr:`x` and :attr:`y`. + It takes the first dimension of :attr:`x` and :attr:`y` as batch size. For each instance, it computes the smooth L1 loss element by element first - and then sums all the losses. So the shape of Out is [batch_size, 1]. + and then sums all the losses. So the shape of ouput Variable is + [batch_size, 1]. Args: x (Variable): A tensor with rank at least 2. The input value of smooth L1 loss op with shape [batch_size, dim1, ..., dimN]. y (Variable): A tensor with rank at least 2. The target value of smooth - L1 loss op with same shape as x. + L1 loss op with same shape as :attr:`x`. inside_weight (Variable|None): A tensor with rank at least 2. This - input is optional and should have same shape with x. If provided, - the result of (x - y) will be multiplied by this tensor element by - element. + input is optional and should have same shape with :attr:`x`. If + provided, the result of (:attr:`x` - :attr:`y`) will be multiplied + by this tensor element by element. outside_weight (Variable|None): A tensor with rank at least 2. This - input is optional and should have same shape with x. If provided, - the out smooth L1 loss will be multiplied by this tensor element - by element. - sigma (float|None): Hyper parameter of smooth L1 loss op. A float scalar - with default value 1.0. + input is optional and should have same shape with :attr:`x`. If + provided, the out smooth L1 loss will be multiplied by this tensor + element by element. + sigma (float|None): Hyper parameter of smooth L1 loss layer. A float + scalar with default value 1.0. + Returns: - Variable: A tensor with rank be 2. The output smooth L1 loss with - shape [batch_size, 1]. + Variable: The output smooth L1 loss with shape [batch_size, 1]. Examples: .. code-block:: python @@ -3968,6 +4264,7 @@ def smooth_l1(x, y, inside_weight=None, outside_weight=None, sigma=None): fc = fluid.layers.fc(input=data, size=100) out = fluid.layers.smooth_l1(x=fc, y=label) """ + helper = LayerHelper('smooth_l1_loss', **locals()) diff = helper.create_tmp_variable(dtype=x.dtype) loss = helper.create_tmp_variable(dtype=x.dtype) @@ -3987,32 +4284,20 @@ def smooth_l1(x, y, inside_weight=None, outside_weight=None, sigma=None): def one_hot(input, depth): """ - One Hot Operator. This operator creates the one-hot representations for input - index values. The following example will help to explain the function of this - operator. + This layer creates the one-hot representations for input indices. Args: - input(variable): A Tensor/LodTensor of indices, last dimension must be 1. - depth(scalar): an interger defining the depth of the one hot dimension. + input(Variable): Input indices, last dimension must be 1. + depth(scalar): An interger defining the depth of the one-hot dimension. Returns: - The one-hot tensor or LodTensor, same as input. + Variable: The one-hot representations of input. Examples: .. code-block:: python - X is a LoDTensor: - X.lod = [[0, 1, 4]] - X.shape = [4, 1] - X.data = [[1], [1], [3], [0]] - set depth = 4 - Out is a LoDTensor: - Out.lod = [[0, 1, 4]] - Out.shape = [4, 4] - Out.data = [[0., 1., 0., 0.], - [0., 1., 0., 0.], - [0., 0., 0., 1.], - [1., 0., 0., 0.]] + label = layers.data(name="label", shape=[1], dtype="float32") + one_hot_label = layers.one_hot(input=label, depth=10) """ helper = LayerHelper("one_hot", **locals()) one_hot_out = helper.create_tmp_variable(dtype='float32') @@ -4026,8 +4311,9 @@ def one_hot(input, depth): def autoincreased_step_counter(counter_name=None, begin=1, step=1): """ - NOTE: The counter will be automatically increased by 1 every mini-batch - Return the run counter of the main program, which is started with 1. + Create an auto-increase variable + which will be automatically increased by 1 every mini-batch + Return the run counter of the main program, default is started from 1. Args: counter_name(str): The counter name, default is '@STEP_COUNTER@'. @@ -4036,6 +4322,12 @@ def autoincreased_step_counter(counter_name=None, begin=1, step=1): Returns: Variable: The global run counter. + + Examples: + .. code-block:: python + + global_step = fluid.layers.autoincreased_step_counter( + counter_name='@LR_DECAY_COUNTER@', begin=begin, step=1) """ helper = LayerHelper('global_step_counter') if counter_name is None: @@ -4105,14 +4397,18 @@ def reshape(x, shape, actual_shape=None, act=None, inplace=True, name=None): say :attr:`actual_shape` has a higher priority than :attr:`shape`. act (str): The non-linear activation to be applied to output variable. - inplace(bool): If this flag is set true, a new output tensor is created - whose data is copied from input x, otherwise the output - shares data with input without copying. + inplace(bool): If this flag is set true, the output + shares data with input without copying, otherwise + a new output tensor is created + whose data is copied from input x. name (str): The name of this layer. It is optional. Returns: Variable: The output tensor. + Raises: + TypeError: if actual_shape is neither Variable nor None. + Examples: .. code-block:: python @@ -4124,6 +4420,11 @@ def reshape(x, shape, actual_shape=None, act=None, inplace=True, name=None): if not (isinstance(shape, list) or isinstance(shape, tuple)): raise ValueError("Input shape must be a python lsit or tuple.") + inputs = {"X": x} + if isinstance(actual_shape, Variable): + inputs["Shape"] = actual_shape + elif actual_shape is not None: + raise TypeError("actual_shape should either be Variable or None") # Validate the shape unk_dim_idx = -1 @@ -4144,9 +4445,7 @@ def reshape(x, shape, actual_shape=None, act=None, inplace=True, name=None): reshaped = helper.create_tmp_variable(dtype=x.dtype) helper.append_op( type="reshape", - inputs={"X": x, - "Shape": actual_shape} - if isinstance(actual_shape, Variable) else {"X": x}, + inputs=inputs, attrs={"shape": shape, "inplace": inplace}, outputs={"Out": reshaped}) @@ -4156,73 +4455,74 @@ def reshape(x, shape, actual_shape=None, act=None, inplace=True, name=None): def lod_reset(x, y=None, target_lod=None): """ - LoD Reset Operator. Set LoD of **x** to a new one specified by **y** or - **target_lod**. When **y** provided, **y.lod** would be considered as target - LoD first, otherwise **y.data** would be considered as target LoD. If **y** - is not provided, target LoD should be specified by **target_lod**. - If target LoD is specified by **Y.data** or **target_lod**, only one level - LoD is supported. + Set LoD of :attr:`x` to a new one specified by :attr:`y` or + :attr:`target_lod`. When :attr:`y` provided, :attr:`y.lod` would be + considered as target LoD first, otherwise :attr:`y.data` would be + considered as target LoD. If :attr:`y` is not provided, target LoD should + be specified by :attr:`target_lod`. If target LoD is specified by + :attr:`Y.data` or :attr:`target_lod`, only one level LoD is supported. .. code-block:: text * Example 1: Given a 1-level LoDTensor x: - x.lod = [[ 0, 2, 5 6 ]] + x.lod = [[ 2, 3, 1 ]] x.data = [[1.0], [2.0], [3.0], [4.0], [5.0], [6.0]] x.dims = [6, 1] - target_lod: [0, 4, 6] + target_lod: [4, 2] then we get a 1-level LoDTensor: - out.lod = [[ 0, 4, 6 ]] + out.lod = [[4, 2]] out.data = [[1.0], [2.0], [3.0], [4.0], [5.0], [6.0]] out.dims = [6, 1] * Example 2: Given a 1-level LoDTensor x: - x.lod = [[ 0, 2, 5 6 ]] + x.lod = [[2, 3, 1]] x.data = [[1.0], [2.0], [3.0], [4.0], [5.0], [6.0]] x.dims = [6, 1] y is a Tensor: - y.data = [[0, 2, 6]] + y.data = [[2, 4]] y.dims = [1, 3] then we get a 1-level LoDTensor: - out.lod = [[ 0, 2, 6 ]] + out.lod = [[2, 4]] out.data = [[1.0], [2.0], [3.0], [4.0], [5.0], [6.0]] out.dims = [6, 1] * Example 3: Given a 1-level LoDTensor x: - x.lod = [[ 0, 2, 5 6 ]] + x.lod = [[2, 3, 1]] x.data = [[1.0], [2.0], [3.0], [4.0], [5.0], [6.0]] x.dims = [6, 1] y is a 2-level LoDTensor: - y.lod = [[0, 2, 4], [0, 2, 5, 6]] + y.lod = [[2, 2], [2, 2, 1, 1]] y.data = [[1.1], [2.1], [3.1], [4.1], [5.1], [6.1]] y.dims = [6, 1] then we get a 2-level LoDTensor: - out.lod = [[0, 2, 4], [0, 2, 5, 6]] + out.lod = [[2, 2], [2, 2, 1, 1]] out.data = [[1.0], [2.0], [3.0], [4.0], [5.0], [6.0]] out.dims = [6, 1] Args: x (Variable): Input variable which could be a Tensor or LodTensor. - y (Variable|None): If provided, output's LoD would be derived from y. + y (Variable|None): If provided, output's LoD would be derived + from :attr:`y`. target_lod (list|tuple|None): One level LoD which should be considered - as target LoD when y not provided. + as target LoD when :attr:`y` not provided. Returns: - Variable: Output variable with LoD specified by this operator. + Variable: Output variable with LoD specified by this layer. Raises: - ValueError: If y and target_lod are both None. + ValueError: If :attr:`y` and :attr:`target_lod` are both None. Examples: .. code-block:: python @@ -4258,9 +4558,7 @@ def lrn(input, n=5, k=1.0, alpha=1e-4, beta=0.75, name=None): .. math:: - Output(i, x, y) = Input(i, x, y) / \left( - k + \alpha \sum\limits^{\min(C, c + n/2)}_{j = \max(0, c - n/2)} - (Input(j, x, y))^2 \right)^{\beta} + Output(i, x, y) = Input(i, x, y) / \\left(k + \\alpha \\sum\\limits^{\\min(C, c + n/2)}_{j = \\max(0, c - n/2)}(Input(j, x, y))^2\\right)^{\\beta} In the above equation: @@ -4444,34 +4742,20 @@ def label_smooth(label, return smooth_label +@templatedoc() def roi_pool(input, rois, pooled_height=1, pooled_width=1, spatial_scale=1.0): """ - Region of interest pooling (also known as RoI pooling) is to perform - is to perform max pooling on inputs of nonuniform sizes to obtain - fixed-size feature maps (e.g. 7*7). - The operator has three steps: - 1. Dividing each region proposal into equal-sized sections with - the pooled_width and pooled_height - 2. Finding the largest value in each section - 3. Copying these max values to the output buffer + ${comment} Args: - input (Variable): The input for ROI pooling. - rois (Variable): ROIs (Regions of Interest) to pool over. It should - be a 2-D one level LoTensor of shape [num_rois, 4]. - The layout is [x1, y1, x2, y2], where (x1, y1) - is the top left coordinates, and (x2, y2) is the - bottom right coordinates. The num_rois is the - total number of ROIs in this batch data. - pooled_height (integer): The pooled output height. Default: 1 - pooled_width (integer): The pooled output width. Default: 1 - spatial_scale (float): Multiplicative spatial scale factor. To - translate ROI coords from their input scale - to the scale used when pooling. Default: 1.0 + input (Variable): ${x_comment} + rois (Variable): ROIs (Regions of Interest) to pool over. + pooled_height (integer): ${pooled_height_comment} Default: 1 + pooled_width (integer): ${pooled_width_comment} Default: 1 + spatial_scale (float): ${spatial_scale_comment} Default: 1.0 Returns: - pool_out (Variable): The output is a 4-D tensor of the shape - (num_rois, channels, pooled_h, pooled_w). + Variable: ${out_comment}. Examples: .. code-block:: python @@ -4543,12 +4827,13 @@ def image_resize(input, name=None, resample='BILINEAR'): """ - Resize a batch of images. + **Resize a Batch of Images** - The input must be a tensor of the shape (num_batches, channels, in_h, in_w), + The input must be a tensor of the shape (num_batches, channels, in_h, in_w), and the resizing only applies on the last two dimensions(hight and width). Supporting resample methods: + 'BILINEAR' : Bilinear interpolation Args: @@ -4568,8 +4853,8 @@ def image_resize(input, Default: 'BILINEAR' Returns: - out (Variable): The output is a 4-D tensor of the shape - (num_batches, channls, out_h, out_w). + Variable: The output is a 4-D tensor of the shape + (num_batches, channls, out_h, out_w). Examples: .. code-block:: python @@ -4640,9 +4925,9 @@ def resize_bilinear(input, out_shape=None, scale=None, name=None): def image_resize_short(input, out_short_len, resample='BILINEAR'): """ - Resize a batch of images. The short edge of input images will be - resized to the given 'out_short_len'. The long edge of input images - will be resized proportionately to make images' length-width ratio + Resize a batch of images. The short edge of input images will be + resized to the given 'out_short_len'. The long edge of input images + will be resized proportionately to make images' length-width ratio constant. Args: @@ -4653,8 +4938,8 @@ def image_resize_short(input, out_short_len, resample='BILINEAR'): resample (str): resample method, default: BILINEAR. Returns: - out (Variable): The output is a 4-D tensor of the shape - (num_batches, channls, out_h, out_w). + Variable: The output is a 4-D tensor of the shape + (num_batches, channls, out_h, out_w). """ in_shape = input.shape if len(in_shape) != 4: @@ -4673,7 +4958,9 @@ def image_resize_short(input, out_short_len, resample='BILINEAR'): def gather(input, index): """ - Output is obtained by gathering entries of the outer-most dimension + **Gather Layer** + + Output is obtained by gathering entries of the outer-most dimension of X indexed by `index` and concatenate them together. .. math:: @@ -4698,7 +4985,7 @@ def gather(input, index): [5, 6]] Args: - input (Variable): The source input with rank>=1. + input (Variable): The source input with rank>=1. index (Variable): The index input with rank=1. Returns: @@ -4726,10 +5013,6 @@ def random_crop(x, shape, seed=None): """ ${comment} - Examples: - >>> img = fluid.layers.data("img", [3, 256, 256]) - >>> cropped_img = fluid.layers.random_crop(img, shape=[3, 224, 224]) - Args: x(${x_type}): ${x_comment} shape(${shape_type}): ${shape_comment} @@ -4739,63 +5022,115 @@ def random_crop(x, shape, seed=None): Returns: ${out_comment} + Examples: + >>> img = fluid.layers.data("img", [3, 256, 256]) + >>> cropped_img = fluid.layers.random_crop(img, shape=[3, 224, 224]) """ helper = LayerHelper("random_crop", **locals()) - dtype = helper.input_dtype() + dtype = x.dtype out = helper.create_tmp_variable(dtype) if seed is None: seed = random.randint(-65536, 65535) - + op_attrs = {"shape": shape} if isinstance(seed, int): - seed_value = seed - seed = helper.create_tmp_variable(dtype="int64") - helper.append_op( - type="fill_constant", - inputs={}, - outputs={"Out": seed}, - attrs={ - "dtype": seed.dtype, - "shape": [1], - "value": float(seed_value) - }) + op_attrs["startup_seed"] = seed + seed = helper.create_variable( + name=unique_name.generate("random_crop_seed"), + dtype="int64", + persistable=True) elif not isinstance(seed, Variable): raise ValueError("'seed' must be a Variable or an int.") - seed_out = helper.create_tmp_variable(dtype="int64") helper.append_op( type="random_crop", inputs={"X": x, "Seed": seed}, outputs={"Out": out, - "SeedOut": seed_out}, - attrs={"shape": shape}) + "SeedOut": seed}, + attrs=op_attrs) + return out + + +def log(x): + """ + Calculates the natural log of the given input tensor, element-wise. + + .. math:: + + Out = \\ln(x) + + Args: + x (Variable): Input tensor. + + Returns: + Variable: The natural log of the input tensor computed element-wise. + + Examples: + + .. code-block:: python + + output = fluid.layers.log(x) + """ + helper = LayerHelper('log', **locals()) + dtype = helper.input_dtype(input_param_name='x') + out = helper.create_tmp_variable(dtype) + helper.append_op(type="log", inputs={"X": x}, outputs={"Out": out}) + return out + + +def relu(x): + """ + Relu takes one input data (Tensor) and produces one output data (Tensor) + where the rectified linear function, y = max(0, x), is applied to + the tensor elementwise. + + .. math:: + + Out = \\max(0, x) + + Args: + x (Variable): The input tensor. + + Returns: + Variable: The output tensor with the same shape as input. + + Examples: + + .. code-block:: python + + output = fluid.layers.relu(x) + """ + helper = LayerHelper('relu', **locals()) + dtype = helper.input_dtype(input_param_name='x') + out = helper.create_tmp_variable(dtype) + helper.append_op(type="relu", inputs={"X": x}, outputs={"Out": out}) return out def mean_iou(input, label, num_classes): """ Mean Intersection-Over-Union is a common evaluation metric for - semantic image segmentation, which first computes the IOU for each - semantic class and then computes the average over classes. - IOU is defined as follows: - + semantic image segmentation, which first computes the IOU for each + semantic class and then computes the average over classes. + IOU is defined as follows: + .. math:: - - IOU = true_positive / (true_positive + false_positive + false_negative). - The predictions are accumulated in a confusion matrix and mean-IOU + IOU = \\frac{true\_positiv}{(true\_positive + false\_positive + false\_negative)}. + + The predictions are accumulated in a confusion matrix and mean-IOU is then calculated from it. Args: input (Variable): A Tensor of prediction results for semantic labels with type int32 or int64. - label (Variable): A Tensor of ground truth labels with type int32 or int64. + label (Variable): A Tensor of ground truth labels with type int32 or int64. Its shape should be the same as input. + num_classes (int): The possible number of labels. Returns: mean_iou (Variable): A Tensor representing the mean intersection-over-union with shape [1]. out_wrong(Variable): A Tensor with shape [num_classes]. The wrong numbers of each class. - out_correct(Variable): A Tensor with shape [num_classes]. The correct numbers of each class. - + out_correct(Variable): A Tensor with shape [num_classes]. The correct numbers of each class. Examples: @@ -4810,12 +5145,110 @@ def mean_iou(input, label, num_classes): out_correct = helper.create_tmp_variable(dtype='int32') helper.append_op( type="mean_iou", - inputs={"predictions": input, - "labels": label}, + inputs={"Predictions": input, + "Labels": label}, outputs={ - "out_mean_iou": out_mean_iou, - "out_wrong": out_wrong, - "out_correct": out_correct + "OutMeanIou": out_mean_iou, + "OutWrong": out_wrong, + "OutCorrect": out_correct }, attrs={"num_classes": num_classes}) return out_mean_iou, out_wrong, out_correct + + +def crop(x, shape=None, offsets=None, name=None): + """ + Crop input into output, as specified by offsets and shape. + + .. code-block:: text + + * Case 1: + Given + X = [[0, 1, 2, 0, 0] + [0, 3, 4, 0, 0] + [0, 0, 0, 0, 0]], + and + shape = [2, 2], + offsets = [0, 1], + output is: + Out = [[1, 2], + [3, 4]]. + * Case 2: + Given + X = [[0, 1, 2, 5, 0] + [0, 3, 4, 6, 0] + [0, 0, 0, 0, 0]], + and shape is tensor + shape = [[0, 0, 0] + [0, 0, 0]] + and + offsets = [0, 1], + + output is: + Out = [[1, 2, 5], + [3, 4, 6]]. + + Args: + x (Variable): The input tensor variable. + shape (Variable|list/tuple of integer): The output shape is specified + by `shape`, which can a Variable or a list/tupe of integer. + If a tensor Variable, it's rank must be the same as `x`. This way + is suitable for the case that the output shape may be changed each + iteration. If a list/tupe of integer, it's length must be the same + as the rank of `x` + offsets (Variable|list/tuple of integer|None): Specifies the copping + offsets at each dimension. It can be a Variable or or a list/tupe + of integer. If a tensor Variable, it's rank must be the same as `x`. + This way is suitable for the case that the offsets may be changed + each iteration. If a list/tupe of integer, it's length must be the + same as the rank of `x`. If None, the offsets are 0 at each + dimension. + name(str|None): A name for this layer(optional). If set None, the layer + will be named automatically. + + Returns: + Variable: The cropped tensor variable. + + Raises: + ValueError: If shape is not a list, tuple or Variable. + + Examples: + + .. code-block:: python + + x = fluid.layers.data(name="x", shape=[3, 5], dtype="float32") + y = fluid.layers.data(name="y", shape=[2, 3], dtype="float32") + crop = fluid.layers.crop(x, shape=y) + + # or + z = fluid.layers.data(name="z", shape=[3, 5], dtype="float32") + crop = fluid.layers.crop(z, shape=[2, 3]) + + """ + helper = LayerHelper('crop', **locals()) + + if not (isinstance(shape, list) or isinstance(shape, tuple) or \ + isinstance(shape, Variable)): + raise ValueError("The shape should be a list, tuple or Variable.") + + if offsets is None: + offsets = [0] * len(x.shape) + + out = helper.create_tmp_variable(x.dtype) + ipts = {'X': x} + attrs = {} + if isinstance(shape, Variable): + ipts['Y'] = shape + else: + attrs['shape'] = shape + if isinstance(offsets, Variable): + ipts['Offsets'] = offsets + else: + attrs['offsets'] = offsets + + helper.append_op( + type='crop', + inputs=ipts, + outputs={'Out': out}, + attrs=None if len(attrs) == 0 else attrs) + return out diff --git a/python/paddle/fluid/layers/ops.py b/python/paddle/fluid/layers/ops.py index 98f169e8f0..9e97ec9a6f 100644 --- a/python/paddle/fluid/layers/ops.py +++ b/python/paddle/fluid/layers/ops.py @@ -17,7 +17,6 @@ __activations__ = [ 'sigmoid', 'logsigmoid', 'exp', - 'relu', 'tanh', 'tanh_shrink', 'softshrink', @@ -29,7 +28,6 @@ __activations__ = [ 'sin', 'round', 'reciprocal', - 'log', 'square', 'softplus', 'softsign', @@ -40,8 +38,6 @@ __activations__ = [ 'relu6', 'pow', 'stanh', - 'hard_shrink', - 'thresholded_relu', 'hard_sigmoid', 'swish', ] @@ -64,18 +60,102 @@ __all__ = [ 'logical_or', 'logical_xor', 'logical_not', - 'uniform_random', 'uniform_random_batch_size_like', 'gaussian_random', 'gaussian_random_batch_size_like', - 'cumsum', 'scatter', 'sum', 'slice', 'polygon_box_transform', 'shape', + 'iou_similarity', 'maxout', ] + __activations__ for _OP in set(__all__): globals()[_OP] = generate_layer_fn(_OP) + +__all__ += ["uniform_random"] + +_uniform_random_ = generate_layer_fn('uniform_random') + + +def uniform_random(shape, dtype=None, min=None, max=None, seed=None): + kwargs = dict() + for name in locals(): + val = locals()[name] + if val is not None: + kwargs[name] = val + return _uniform_random_(**kwargs) + + +uniform_random.__doc__ = _uniform_random_.__doc__ + """ +Examples: + + >>> result = fluid.layers.uniform_random(shape=[32, 784]) +""" + +__all__ += ['hard_shrink'] + +_hard_shrink_ = generate_layer_fn('hard_shrink') + + +def hard_shrink(x, threshold=None): + kwargs = dict() + for name in locals(): + val = locals()[name] + if val is not None: + kwargs[name] = val + return _hard_shrink_(**kwargs) + + +hard_shrink.__doc__ = _hard_shrink_.__doc__ + """ +Examples: + + >>> data = fluid.layers.data(name="input", shape=[784]) + >>> result = fluid.layers.hard_shrink(x=data, threshold=0.3) +""" + +__all__ += ['cumsum'] + +_cum_sum_ = generate_layer_fn('cumsum') + + +def cumsum(x, axis=None, exclusive=None, reverse=None): + kwargs = dict() + for name in locals(): + val = locals()[name] + if val is not None: + kwargs[name] = val + + return _cum_sum_(**kwargs) + + +cumsum.__doc__ = _cum_sum_.__doc__ + """ +Examples: + + >>> data = fluid.layers.data(name="input", shape=[32, 784]) + >>> result = fluid.layers.cumsum(data, axis=0) +""" + +__all__ += ['thresholded_relu'] + +_thresholded_relu_ = generate_layer_fn('thresholded_relu') + + +def thresholded_relu(x, threshold=None): + kwargs = dict() + for name in locals(): + val = locals()[name] + if val is not None: + kwargs[name] = val + + _thresholded_relu_(**kwargs) + + +thresholded_relu.__doc__ = _thresholded_relu_.__doc__ + """ +Examples: + + >>> data = fluid.layers.data(name="input", shape=[1]) + >>> result = fluid.layers.thresholded_relu(data, threshold=0.4) +""" diff --git a/python/paddle/fluid/layers/tensor.py b/python/paddle/fluid/layers/tensor.py index 62b01d595a..b6614ecf3b 100644 --- a/python/paddle/fluid/layers/tensor.py +++ b/python/paddle/fluid/layers/tensor.py @@ -6,7 +6,7 @@ # # http://www.apache.org/licenses/LICENSE-2.0 # -# Unless required by applicable law or agreed to in writing, software +# Unlessf required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and @@ -33,12 +33,32 @@ __all__ = [ 'fill_constant', 'argmin', 'argmax', + 'argsort', 'ones', 'zeros', + 'reverse', ] def create_tensor(dtype, name=None, persistable=False): + """ + Create an variable, which will hold a LoDTensor with data type dtype. + + Args: + dtype(string): 'float32'|'int32'|..., the data type of the + created tensor. + name(string): The name of the created tensor, if not set, + the name will be a random unique one. + persistable(bool): Set the persistable flag of the create tensor. + + Returns: + Variable: The tensor variable storing the created tensor. + + Examples: + .. code-block:: python + + tensor = fluid.layers.create_tensor(dtype='float32') + """ helper = LayerHelper("create_tensor", **locals()) return helper.create_variable( name=helper.name, dtype=dtype, persistable=persistable) @@ -51,7 +71,12 @@ def create_parameter(shape, is_bias=False, default_initializer=None): """ - Create a parameter + Create a parameter. The parameter is a learnable variable, which can have + gradient, and can be optimized. + + NOTE: this is a very low-level API. This API is useful when you create + operator by your self. instead of using layers. + Args: shape(list[int]): shape of the parameter dtype(string): element type of the parameter @@ -63,7 +88,12 @@ def create_parameter(shape, default_initializer(Initializer): initializer for the parameter Returns: - Parameter: the created parameter + the created parameter. + + Examples: + >>> W = fluid.layers.create_parameter(shape=[784, 200], dtype='float32') + >>> data = fluid.layers.data(name="img", shape=[64, 784], append_batch_size=False) + >>> hidden = fluid.layers.matmul(x=data, y=W) """ helper = LayerHelper("create_parameter", **locals()) if attr is None: @@ -79,16 +109,29 @@ def create_global_var(shape, force_cpu=False, name=None): """ - Create a global variable. such as global_step + Create a new variable in the global block(block 0). + Args: shape(list[int]): shape of the variable - value(float): the value of the variable - dtype(string): element type of the parameter - persistable(bool): if this variable is persistable - force_cpu(bool): force this variable to be on CPU + value(float): the value of the variable. The new created + variable will be filled with it. + dtype(string): data type of the variable + persistable(bool): if this variable is persistable. + Default: False + force_cpu(bool): force this variable to be on CPU. + Default: False + name(str|None): The name of the variable. If set to None the variable + name will be generated automatically. + Default: None Returns: Variable: the created Variable + + Examples: + .. code-block:: python + + var = fluid.create_global_var(shape=[2,3], value=1.0, dtype='float32', + persistable=True, force_cpu=True, name='new_var') """ helper = LayerHelper("global_var", **locals()) var = helper.create_global_variable( @@ -101,8 +144,21 @@ def create_global_var(shape, def cast(x, dtype): """ - This function takes in the input with input_dtype - and casts it to the output_dtype as the output. + This layer takes in the Variable :attr:`x` with :attr:`x.dtype` and casts + it to the output with :attr:`dtype`. + + Args: + x (Variable): The input Variable for casting. + dtype(np.dtype|core.VarDesc.VarType|str): Data type of the output Variable. + + Returns: + Variable: The output Variable after casting. + + Examples: + .. code-block:: python + + data = fluid.layers.data(name='x', shape=[13], dtype='float32') + result = fluid.layers.cast(x=data, dtype='float64') """ helper = LayerHelper('cast', **locals()) out = helper.create_tmp_variable(dtype=dtype) @@ -133,7 +189,8 @@ def concat(input, axis=0, name=None): Examples: .. code-block:: python - out = fluid.layers.concat(input=[Efirst, Esecond, Ethird, Efourth]) + + out = fluid.layers.concat(input=[Efirst, Esecond, Ethird, Efourth]) """ helper = LayerHelper('concat', **locals()) out = helper.create_tmp_variable(dtype=helper.input_dtype()) @@ -146,19 +203,21 @@ def concat(input, axis=0, name=None): def sums(input, out=None): - """This function performs the sum operation on the input and returns the + """ + This function performs the sum operation on the input and returns the result as the output. Args: input (Variable|list): The input tensor that has the elements that need to be summed up. + out (Variable|None): Output parameter. The sum result. + Default: None Returns: - Variable: The tensor type variable that has the sum of input - written to it. + Variable: the sum of input. The same as the argument 'out' Examples: - .. code-block::python + .. code-block:: python tmp = fluid.layers.zeros(shape=[10], dtype='int32') i = fluid.layers.fill_constant(shape=[1], dtype='int64', value=10) @@ -172,11 +231,15 @@ def sums(input, out=None): helper = LayerHelper('sum', **locals()) if out is None: out = helper.create_tmp_variable(dtype=helper.input_dtype()) - helper.append_op(type='sum', inputs={'X': input}, outputs={'Out': out}) + helper.append_op( + type='sum', + inputs={'X': input}, + outputs={'Out': out}, + attrs={'use_mkldnn': False}) return out -def assign(input, output): +def assign(input, output=None): """ **Assign** @@ -184,18 +247,21 @@ def assign(input, output): Args: input(Variable|numpy.ndarray): The source variable - output(Variable): The destination variable + output(Variable|None): The destination variable Returns: Variable: The destination variable that was supplied as the *output*. Examples: .. code-block:: python + out = fluid.layers.create_tensor(dtype='float32') hidden = fluid.layers.fc(input=data, size=10) fluid.layers.assign(hidden, out) """ helper = LayerHelper('assign', **locals()) + if output is None: + output = helper.create_tmp_variable(dtype=input.dtype) if isinstance(input, Variable): helper.append_op( type='assign', inputs={'X': [input]}, outputs={'Out': [output]}) @@ -321,22 +387,22 @@ def argmin(x, axis=0): """ **argmin** - This function computes the indices of the min elements + This function computes the indices of the min elements of the input tensor's element along the provided axis. Args: x(Variable): The input to compute the indices of the min elements. axis(int): Axis to compute indices along. - + Returns: Variable: The tensor variable storing the output - + Examples: .. code-block:: python - + out = fluid.layers.argmin(x=in, axis=0) - out = fluid.layers.argmin(x=in, axis=-1) + out = fluid.layers.argmin(x=in, axis=-1) """ helper = LayerHelper("arg_min", **locals()) out = helper.create_tmp_variable(VarDesc.VarType.INT64) @@ -352,22 +418,22 @@ def argmax(x, axis=0): """ **argmax** - This function computes the indices of the max elements + This function computes the indices of the max elements of the input tensor's element along the provided axis. Args: x(Variable): The input to compute the indices of the max elements. axis(int): Axis to compute indices along. - + Returns: Variable: The tensor variable storing the output - + Examples: .. code-block:: python - + out = fluid.layers.argmax(x=in, axis=0) - out = fluid.layers.argmax(x=in, axis=-1) + out = fluid.layers.argmax(x=in, axis=-1) """ helper = LayerHelper("arg_max", **locals()) out = helper.create_tmp_variable(VarDesc.VarType.INT64) @@ -379,6 +445,58 @@ def argmax(x, axis=0): return out +def argsort(input, axis=-1, name=None): + """ + Performs sorting on the input Variable along the given axis, and outputs + sorted data Varibale and its corresponding index Variable with the same + shape as :attr:`input`. + + .. code-block:: text + + For example, the given axis is -1 and the input Variable + + input = [[0.15849551, 0.45865775, 0.8563702 ], + [0.12070083, 0.28766365, 0.18776911]], + + after argsort, the sorted Vairable becomes + + out = [[0.15849551, 0.45865775, 0.8563702 ], + [0.12070083, 0.18776911, 0.28766365]], + + and the sorted indices along the given axis turn outs to be + + indices = [[0, 1, 2], + [0, 2, 1]] + + Args: + input(Variable): The input Variable for sorting. + axis(int): The axis along which to sort the input Variable. When + :attr:`axis` < 0, the actual axis will be :attr:`axis` + + rank(:attr:`input`). Default -1, the last dimension. + name(str|None): (optional) A name for this layer. If set None, the + layer will be named automatically. + + Returns: + tuple: A tuple of sorted data Variable and the sorted indices. + + Examples: + .. code-block:: python + + input = fluid.layers.data(data=[2, 3]) + out, indices = fluid.layers.argsort(input, axis=0) + """ + helper = LayerHelper("argsort", **locals()) + out = helper.create_tmp_variable(dtype=input.dtype, stop_gradient=True) + ids = helper.create_tmp_variable(VarDesc.VarType.INT64, stop_gradient=True) + helper.append_op( + type='argsort', + inputs={'X': input}, + outputs={'Out': out, + 'Indices': ids}, + attrs={'axis': axis}) + return out, ids + + def ones(shape, dtype, force_cpu=False): """ **ones** @@ -413,11 +531,12 @@ def zeros(shape, dtype, force_cpu=False): It also sets *stop_gradient* to True. Args: - shape(tuple|list|None): Shape of output tensor - dtype(np.dtype|core.VarDesc.VarType|str): Data type of output tensor + shape(tuple|list|None): Shape of output tensor. + dtype(np.dtype|core.VarDesc.VarType|str): Data type of output tensor. + force_cpu(bool, default False): Whether to make output stay on CPU. Returns: - Variable: The tensor variable storing the output + Variable: The tensor variable storing the output. Examples: .. code-block:: python @@ -435,9 +554,9 @@ def reverse(x, axis): Args: x(Vairbale): the input to be reversed. - axis(int|tuple|list): Axis that along which order of elements - is reversed. If it is a tuple or a list, reversing - will be apply on each axis in the tuple or list. + axis(int|tuple|list): Axis that along which order of elements + is reversed. If it is a tuple or a list, reversing + will be apply on each axis in the tuple or list. Returns: Variable: The reversed tensor. @@ -468,9 +587,9 @@ def save(x, file_path, overwrite=True): Args: x(variable): The Tensor/LoDTensor to be saved. file_path(str): The file path where the variable will be saved. - overwrite(bool): Whether or not cover the given file when it has already - existed. If it's set 'False' and the file is existed, a runtime - error will be thrown. + overwrite(bool): Whether or not cover the given file when it has already + existed. If it's set 'False' and the file is existed, a runtime + error will be thrown. """ helper = LayerHelper("save", **locals()) helper.append_op( @@ -486,11 +605,27 @@ def save_combine(x, file_path, overwrite=True): Saves a list of variables into a single file. Args: - x(list): A list of Tensor/LoDTensor to be saved together in a single file. + x(list): A list of Tensor/LoDTensor variables to be saved together in + a single file. file_path(str): The file path where variables will be saved. - overwrite(bool): Whether or not cover the given file when it has already - existed. If it's set 'False' and the file is existed, a runtime - error will be thrown. + overwrite(bool): Whether or not cover the given file when it has already + existed. If it's set 'False' and the file is existed, a runtime + error will be thrown. + + Returns: + There is no return value. + + Examples: + + .. code-block:: python + + v1 = fluid.layers.data(name="data", + shape=(4, 6), + dtype="float32") + v2 = fluid.layers.data(name="data", + shape=(6, 8, 4), + dtype="float32") + normed = fluid.layers.save_combine([v1, v2], file_path="output") """ helper = LayerHelper("save_combine", **locals()) helper.append_op( diff --git a/python/paddle/fluid/lod_tensor.py b/python/paddle/fluid/lod_tensor.py index 9946d0a4ff..b2b3186c1e 100644 --- a/python/paddle/fluid/lod_tensor.py +++ b/python/paddle/fluid/lod_tensor.py @@ -18,172 +18,117 @@ import numpy as np __all__ = ['create_lod_tensor', 'create_random_int_lodtensor'] -def _validate_lod(lod, tensor_height=-1): - """Check whether the input length-based lod info is valid. - - There are several things to check: - 1. lod should be a list of lists. Empty list is fine. - 2. The length of each sublist (a lod level) should be at least one. - 3. Each element in each lod level should be an integer greater than 0. - 4. The sum of one lod level should be equal to the length of the next lod level. - 5. The sum of the last lod level should be equal to the tensor height. - Bypass this check if user does not provide tensor_height as input. - - Args: - lod: the length-based lod info, e.g., [[2, 3], [2, 1, 2, 3, 4]]. - tensor_height: the outermost dimension of the tensor with which the input - lod is associated with. - - Returns: - A boolean indicating whether the input lod is valid or not. +def create_lod_tensor(data, recursive_seq_lens, place): """ - assert isinstance(lod, list), "lod should be a list" - # Empty lod is fine - if len(lod) == 0: - return True - - lod_sum = [] - for level in lod: - assert isinstance(level, list), "each item in lod should be a list" - # Each level of lod should have at least one length info - if len(level) < 1: - return False - level_sum = 0 - for lod_len in level: - # Each length in a level should be > 0 - if lod_len <= 0: - return False - level_sum += lod_len - lod_sum.append(level_sum) - - for idx, val in enumerate(lod_sum[:-1]): - # Each level's sum should be equal to - # the number of items in the next level - if val != len(lod[idx + 1]): - return False - - if tensor_height == -1: - return True - else: - # Last level's sum should be equal to the tensor height - return lod_sum[-1] == tensor_height - - -def _convert_lod(lod): - """Convert a length-based lod to a offset-based lod. - - If the length-based lod is [[2, 3], [2, 1, 2, 3, 4]], - then the offset-based lod is [[0, 2, 5], [0, 2, 3, 5, 8, 12]]. - - Args: - lod: a length-based lod info. + Create a lod tensor from a numpy array, a list, or an existing lod tensor. - Returns: - A list of lists as the offset-based lod converted to from the input lod. - """ - new_lod = [] - for level in lod: - cur_len = 0 - new_level = [cur_len] - for lod_len in level: - cur_len += lod_len - new_level.append(cur_len) - new_lod.append(new_level) - return new_lod + Create a lod tensor by doing the following: + 1. Check that the length-based level of detail (LoD) also known as + recursive_sequence_lengths of the input is valid. -def create_lod_tensor(data, lod, place): - """Create a lod tensor from a numpy array, a list, or an existing lod tensor. + 2. Convert recursive_sequence_lengths to a offset-based LoD. - Create a lod tensor by doing the following: - 1. Check that the length-based input lod is valid. - 2. Convert the length-based lod to a offset-based LoD. - 3. Copy the data from a numpy array, a list or a existing lod tensor to + 3. Copy the data from a numpy array, a list or a existing lod tensor to CPU or GPU device (based on input place). + 4. Set the level of detail (LoD) using the offset-based LoD. - Use example: - Suppose we want LoDTensor to hold data for sequences of word, where each word is - represented by an integer. If we want to create a LoDTensor to represent two - sentences, one of 2 words, and one of 3 words. + Examples: - Then 'data' can be a numpy array of integers with shape (5, 1). - 'lod' will be [[2, 3]], indicating the length(# of words) in each sentence. - This length-based input lod [[2, 3]] will be converted to offset-based lod [[0, 2, 5]] - inside the function call. + Suppose we want LoDTensor to hold data for sequences of word, where each + word is represented by an integer. If we want to create a LoDTensor to + represent two sentences, one of 2 words, and one of 3 words. - Please refer to - github.com/PaddlePaddle/Paddle/blob/develop/doc/fluid/design/concepts/lod_tensor.md - for more details regarding LoD. + Then :code:`data` can be a numpy array of integers with shape (5, 1). + :code:`recursive_seq_lens` will be [[2, 3]], indicating the length(# of words) in each + sentence. This length-based :code:`recursive_seq_lens` [[2, 3]] will be converted to + offset-based LoD [[0, 2, 5]] inside the function call. + + Please reference :ref:`api_guide_low_level_lod_tensor` for more details + regarding LoD. Args: - data: a numpy array or a LoDTensor or a list holding the data to be copied. - lod: a list of lists indicating the length-based LoD info specified by the user. - place: CPU or GPU place indicating where the data in the new LoDTensor will be stored. + data(numpy.ndarray|list|LoDTensor): a numpy array or a LoDTensor or a + list holding the data to be copied. + recursive_seq_lens(list): a list of lists indicating the length-based level of detail + info specified by the user. + place(Place): CPU or GPU place indicating where the data in the new + LoDTensor will be stored. Returns: - A fluid LoDTensor object with tensor data and lod info. + A fluid LoDTensor object with tensor data and recursive_seq_lens info. """ if isinstance(data, core.LoDTensor): - return create_lod_tensor(np.array(data), lod, place) + return create_lod_tensor(np.array(data), recursive_seq_lens, place) elif isinstance(data, list): # When input data is a list, it only deal with the case where the base element # is an index of shape [1] and dtype int64 (e.g., word id). Hence, the generated # LoDTensor will be of shape [n, 1] and dtype int64, where `n` is the total number # of words or other indexes in the sequence. - new_lod = [] + new_recursive_seq_lens = [] for seq in data: - new_lod.append(len(seq)) - assert [new_lod] == lod, "data and lod do not match" + new_recursive_seq_lens.append(len(seq)) + assert [ + new_recursive_seq_lens + ] == recursive_seq_lens, "data and recursive_seq_lens do not match" flattened_data = np.concatenate(data, axis=0).astype("int64") flattened_data = flattened_data.reshape([len(flattened_data), 1]) - return create_lod_tensor(flattened_data, lod, place) + return create_lod_tensor(flattened_data, recursive_seq_lens, place) elif isinstance(data, np.ndarray): - assert _validate_lod(lod, - data.shape[0]), "the provided lod info is invalid" tensor = core.LoDTensor() tensor.set(data, place) - tensor.set_lod(_convert_lod(lod)) + tensor.set_recursive_sequence_lengths(recursive_seq_lens) + assert tensor.has_valid_recursive_sequence_lengths( + ), "the provided lod info is invalid" return tensor else: raise TypeError( "data should be either a LoDTensor, a Numpy array or a list") -def create_random_int_lodtensor(lod, base_shape, place, low, high): - """Create a LoDTensor containing random integers. +def create_random_int_lodtensor(recursive_seq_lens, base_shape, place, low, + high): + """ + Create a LoDTensor containing random integers. - This function is frequently used in the book examples. So we revised it based on - the new create_lod_tensor API and put it here in the lod_tensor module to simplify - the code. + This function is frequently used in the book examples. So we revised it + based on the new create_lod_tensor API and put it here in the lod_tensor + module to simplify the code. The function does the following: - 1. Calculate the overall shape of the LoDTensor based on the length-based 'lod' input - and the shape of the basic element in 'base_shape'. + + 1. Calculate the overall shape of the LoDTensor based on the length-based + :code:`recursive_seq_lens` input and the shape of the basic element in + :code:`base_shape`. + 2. Create a numpy array of this shape. + 3. Create the LoDTensor using create_lod_tensor API. - Suppose we want LoDTensor to hold data for sequences of word, where each word is - represented by an integer. If we want to create a LoDTensor to represent two - sentences, one of 2 words, and one of 3 words. Then 'base_shape' is [1], input - length-based 'lod' is [[2, 3]]. Then the overall shape of the LoDTensor would be - [5, 1], holding 5 words for two sentences. + Suppose we want LoDTensor to hold data for sequences of word, where each + word is represented by an integer. If we want to create a LoDTensor to + represent two sentences, one of 2 words, and one of 3 words. Then + 'base_shape' is [1], input length-based 'recursive_seq_lens' is [[2, 3]]. + Then the overall shape of the LoDTensor would be [5, 1], holding 5 words + for two sentences. Args: - data: a numpy array or a LoDTensor holding the data to be copied. - lod: a list of lists indicating the length-based LoD info specified by the user. - base_shape: the shape of the basic element to be held by the LoDTensor. - place: CPU or GPU place indicating where the data in the new LoDTensor will be stored. - low: the lower bound of the random integers. - high: the upper bound of the random integers. + recursive_seq_lens(list): a list of lists indicating the length-based + level of detail info specified by the user. + base_shape(list): the shape of the basic element to be held by the + LoDTensor. + place(Place): CPU or GPU place indicating where the data in the new + LoDTensor will be stored. + low(int): the lower bound of the random integers. + high(int): the upper bound of the random integers. Returns: - A fluid LoDTensor object with tensor data and lod info. + A fluid LoDTensor object with tensor data and recursive_seq_lens info. """ assert isinstance(base_shape, list), "base_shape should be a list" - converted_lod = _convert_lod(lod) # append the total number of basic elements to the front of its shape - overall_shape = [converted_lod[-1][-1]] + base_shape + overall_shape = [sum(recursive_seq_lens[-1])] + base_shape # the range of integer data elements is [low, high] data = np.random.random_integers(low, high, overall_shape).astype("int64") - return create_lod_tensor(data, lod, place) + return create_lod_tensor(data, recursive_seq_lens, place) diff --git a/python/paddle/fluid/metrics.py b/python/paddle/fluid/metrics.py index bb9c6fdc60..17bb0826a6 100644 --- a/python/paddle/fluid/metrics.py +++ b/python/paddle/fluid/metrics.py @@ -23,6 +23,8 @@ import warnings __all__ = [ 'MetricBase', 'CompositeMetric', + 'Precision', + 'Recall', 'Accuracy', 'ChunkEvaluator', 'EditDistance', @@ -46,33 +48,34 @@ def _is_number_or_matrix_(var): class MetricBase(object): """ - Base Class for all evaluators + Base Class for all Metrics. + MetricBase define a group of interfaces for the + model evaluation methods. Metrics accumulate metric states between + consecutive minibatches, at every minibatch, use update + interface to add current minibatch value to global states. + Use eval to compute accumative metric value from last reset() + or from scratch on. + If you need to custom a new metric, please inherit from MetricBase and + custom implementation. Args: - name(str): The name of evaluator. such as, "accuracy". Used for generate - temporary variable name. - Interface: - Note(*) : the states is the attributes who not has _ prefix. - - get_config(): print current states and configuration - reset(): clear the states. If the Metrics states type is not (int, float, np.ndarray), - Please override this method. - update(): update states at every minibatch - eval(): get metric evaluation in numpy type. + name(str): The name of metric instance. such as, "accuracy". + It needed if you want to distinct different metrics in a model. + """ - def __init__(self, name, **kwargs): + def __init__(self, name): self._name = str(name) if name != None else self.__class__.__name__ - self._kwargs = kwargs if kwargs != None else dict() - self.reset() def __str__(self): return self._name def reset(self): """ - states is the attributes who not has _ prefix. - reset the states of metrics. + reset clear the states of metrics. By default, the states + are the members who do not has _ prefix, reset set them to inital states. + If you violate the implicit name rule, please also custom the reset + interface. """ states = { attr: value @@ -90,61 +93,231 @@ class MetricBase(object): setattr(self, attr, None) def get_config(self): + """ + Get the metric and current states. + The states are the members who do not has "_" prefix. + + Args: + None + + Returns: + dict: a dict of metric and states + """ states = { attr: value for attr, value in self.__dict__.iteritems() if not attr.startswith("_") } - config = copy.deepcopy(self._kwargs) + config = {} config.update({"name": self._name, "states": copy.deepcopy(states)}) return config - def update(self): - raise NotImplementedError() + def update(self, preds, labels): + """ + Updates the metric states at every minibatch. + One user can compute the minibatch metric via pure Python, or + via a c++ operator. + + Args: + preds(numpy.array): the predictions of current minibatch + labels(numpy.array): the labels of current minibatch, if the label is one-hot + or soft-label, should custom the corresponding update rule. + """ + raise NotImplementedError( + "Should not use it directly, please extend it.") def eval(self): - raise NotImplementedError() + """ + Evalute the current metrics based the accumulated states. + + Returns: + float|list(float)|numpy.array: the metrics via Python. + """ + raise NotImplementedError( + "Should not use it directly, please extend it.") class CompositeMetric(MetricBase): """ - Compute multiple metrics in each minibatch. + Composite multiple metrics in one instance. for example, merge F1, accuracy, recall into one Metric. + + Examples: + .. code-block:: python + + labels = fluid.layers.data(name="data", shape=[1], dtype="int32") + data = fluid.layers.data(name="data", shape=[32, 32], dtype="int32") + pred = fluid.layers.fc(input=data, size=1000, act="tanh") + comp = fluid.metrics.CompositeMetric() + acc = fluid.metrics.Precision() + recall = fluid.metrics.Recall() + comp.add_metric(acc) + comp.add_metric(recall) + for pass in range(PASSES): + comp.reset() + for data in train_reader(): + loss, preds, labels = exe.run(fetch_list=[cost, preds, labels]) + comp.update(preds=preds, labels=labels) + numpy_acc, numpy_recall = comp.eval() """ - def __init__(self, name=None, **kwargs): - super(CompositeMetric, self).__init__(name, kwargs) + def __init__(self, name=None): + super(CompositeMetric, self).__init__(name) self._metrics = [] def add_metric(self, metric): + """ + add one metric instance to CompositeMetric. + + Args: + metric: a instance of MetricBase. + """ if not isinstance(metric, MetricBase): raise ValueError("SubMetric should be inherit from MetricBase.") self._metrics.append(metric) + def update(self, preds, labels): + """ + Update every metrics in sequence. + + Args: + preds(numpy.array): the predictions of current minibatch + labels(numpy.array): the labels of current minibatch, if the label is one-hot + or soft-label, should custom the corresponding update rule. + """ + for m in self._metrics: + ans.append(m.update(preds, labels)) + def eval(self): + """ + Evaluate every metrics in sequence. + + Returns: + list(float|numpy.array): a list of metrics value in Python. + """ ans = [] for m in self._metrics: ans.append(m.eval()) return ans +class Precision(MetricBase): + """ + Precision (also called positive predictive value) is the fraction of + relevant instances among the retrieved instances. + https://en.wikipedia.org/wiki/Evaluation_of_binary_classifiers + + Note Precision is different with Accuracy in binary classifiers. + accuracy = true positive / total instances + precision = true positive / all positive instance + + Examples: + .. code-block:: python + + metric = fluid.metrics.Precision() + for pass in range(PASSES): + metric.reset() + for data in train_reader(): + loss, preds, labels = exe.run(fetch_list=[cost, preds, labels]) + metric.update(preds=preds, labels=labels) + numpy_precision = metric.eval() + """ + + def __init__(self, name=None): + super(Precision, self).__init__(name) + self.tp = 0 # true positive + self.fp = 0 # false positive + + def update(self, preds, labels): + if not _is_numpy_(preds): + raise ValueError("The 'preds' must be a numpy ndarray.") + if not _is_numpy_(labels): + raise ValueError("The 'labels' must be a numpy ndarray.") + sample_num = labels[0] + for i in range(sample_num): + pred = preds[i].astype("int32") + label = labels[i] + if label == 1: + if pred == label: + self.tp += 1 + else: + self.fp += 1 + + def eval(self): + ap = self.tp + self.fp + return float(self.tp) / ap if ap != 0 else .0 + + +class Recall(MetricBase): + """ + Recall (also known as sensitivity) is the fraction of + relevant instances that have been retrieved over the + total amount of relevant instances + + https://en.wikipedia.org/wiki/Precision_and_recall + + Examples: + .. code-block:: python + + metric = fluid.metrics.Recall() + for pass in range(PASSES): + metric.reset() + for data in train_reader(): + loss, preds, labels = exe.run(fetch_list=[cost, preds, labels]) + metric.update(preds=preds, labels=labels) + numpy_recall = metric.eval() + """ + + def __init__(self, name=None): + super(Recall, self).__init__(name) + self.tp = 0 # true positive + self.fn = 0 # false negtive + + def update(self, preds, labels): + if not _is_numpy_(preds): + raise ValueError("The 'preds' must be a numpy ndarray.") + if not _is_numpy_(labels): + raise ValueError("The 'labels' must be a numpy ndarray.") + sample_num = labels[0] + for i in range(sample_num): + pred = preds[i].astype("int32") + label = labels[i] + if label == 1: + if pred == label: + self.tp += 1 + else: + if pred != label: + self.fn += 1 + + def eval(self): + recall = self.tp + self.fn + return float(self.tp) / recall if recall != 0 else .0 + + class Accuracy(MetricBase): """ Accumulate the accuracy from minibatches and compute the average accuracy for every pass. + https://en.wikipedia.org/wiki/Accuracy_and_precision Args: name: the metrics name - Example: - minibatch_accuracy = fluid.layers.accuracy(pred, label) - accuracy_evaluator = fluid.metrics.Accuracy() - for epoch in PASS_NUM: - accuracy_evaluator.reset() - for data in batches: - loss = exe.run(fetch_list=[cost, minibatch_accuracy]) - accuracy_evaluator.update(value=minibatch_accuracy, weight=batches) - accuracy = accuracy_evaluator.eval() + Examples: + .. code-block:: python + + labels = fluid.layers.data(name="data", shape=[1], dtype="int32") + data = fluid.layers.data(name="data", shape=[32, 32], dtype="int32") + pred = fluid.layers.fc(input=data, size=1000, act="tanh") + minibatch_accuracy = fluid.layers.accuracy(pred, label) + accuracy_evaluator = fluid.metrics.Accuracy() + for pass in range(PASSES): + accuracy_evaluator.reset() + for data in train_reader(): + batch_size = data[0] + loss = exe.run(fetch_list=[cost, minibatch_accuracy]) + accuracy_evaluator.update(value=minibatch_accuracy, weight=batch_size) + numpy_acc = accuracy_evaluator.eval() """ def __init__(self, name=None): @@ -153,6 +326,13 @@ class Accuracy(MetricBase): self.weight = .0 def update(self, value, weight): + """ + Update minibatch states. + + Args: + value(float|numpy.array): accuracy of one minibatch. + weight(int|float): batch size. + """ if not _is_number_or_matrix_(value): raise ValueError( "The 'value' must be a number(int, float) or a numpy ndarray.") @@ -163,9 +343,8 @@ class Accuracy(MetricBase): def eval(self): if self.weight == 0: - raise ValueError( - "There is no data in Accuracy Metrics. Please check layers.accuracy output has added to Accuracy." - ) + raise ValueError("There is no data in Accuracy Metrics. \ + Please check layers.accuracy output has added to Accuracy.") return self.value / self.weight @@ -174,6 +353,25 @@ class ChunkEvaluator(MetricBase): Accumulate counter numbers output by chunk_eval from mini-batches and compute the precision recall and F1-score using the accumulated counter numbers. + For some basics of chunking, please refer to + 'Chunking with Support Vector Machines '. + ChunkEvalEvaluator computes the precision, recall, and F1-score of chunk detection, + and supports IOB, IOE, IOBES and IO (also known as plain) tagging schemes. + + Examples: + .. code-block:: python + + labels = fluid.layers.data(name="data", shape=[1], dtype="int32") + data = fluid.layers.data(name="data", shape=[32, 32], dtype="int32") + pred = fluid.layers.fc(input=data, size=1000, act="tanh") + precision, recall, f1_score, num_infer_chunks, num_label_chunks, num_correct_chunks = layers.chunk_eval( + input=pred, + label=label) + metric = fluid.metrics.ChunkEvaluator() + for data in train_reader(): + loss, preds, labels = exe.run(fetch_list=[cost, preds, labels]) + metric.update(num_infer_chunks, num_label_chunks, num_correct_chunks) + numpy_precision, numpy_recall, numpy_f1 = metric.eval() """ def __init__(self, name=None): @@ -183,9 +381,17 @@ class ChunkEvaluator(MetricBase): self.num_correct_chunks = 0 def update(self, num_infer_chunks, num_label_chunks, num_correct_chunks): + """ + Update the states based on the layers.chunk_eval() ouputs. + Args: + num_infer_chunks(int|numpy.array): The number of chunks in Inference on the given minibatch. + num_label_chunks(int|numpy.array): The number of chunks in Label on the given mini-batch. + num_correct_chunks(int|float|numpy.array): The number of chunks both in Inference and Label on the + given mini-batch. + """ if not _is_number_or_matrix_(num_infer_chunks): raise ValueError( - "The 'num_infer_chunks' must be a number(int, float) or a numpy ndarray." + "The 'num_infer_chunks' must be a number(int) or a numpy ndarray." ) if not _is_number_or_matrix_(num_label_chunks): raise ValueError( @@ -212,21 +418,28 @@ class ChunkEvaluator(MetricBase): class EditDistance(MetricBase): """ + Edit distance is a way of quantifying how dissimilar two strings + (e.g., words) are to one another by counting the minimum number + of operations required to transform one string into the other. + Refer to https://en.wikipedia.org/wiki/Edit_distance + Accumulate edit distance sum and sequence number from mini-batches and compute the average edit_distance and instance error of all batches. Args: name: the metrics name - Example: - edit_distance_metrics = fluid.layers.edit_distance(input, label) - distance_evaluator = fluid.metrics.EditDistance() - for epoch in PASS_NUM: - distance_evaluator.reset() - for data in batches: - loss = exe.run(fetch_list=[cost] + list(edit_distance_metrics)) - distance_evaluator.update(*edit_distance_metrics) - distance, instance_error = distance_evaluator.eval() + Examples: + .. code-block:: python + + distances, seq_num = fluid.layers.edit_distance(input, label) + distance_evaluator = fluid.metrics.EditDistance() + for epoch in PASS_NUM: + distance_evaluator.reset() + for data in batches: + loss = exe.run(fetch_list=[cost] + list(edit_distance_metrics)) + distance_evaluator.update(distances, seq_num) + distance, instance_error = distance_evaluator.eval() In the above example: 'distance' is the average of the edit distance in a pass. @@ -264,16 +477,38 @@ class EditDistance(MetricBase): class DetectionMAP(MetricBase): """ Calculate the detection mean average precision (mAP). - - TODO (Dang Qingqing): update the following doc. - The general steps are as follows: - 1. calculate the true positive and false positive according to the input - of detection and labels. - 2. calculate mAP value, support two versions: '11 point' and 'integral'. - + mAP is the metric to measure the accuracy of object detectors + like Faster R-CNN, SSD, etc. + It is the average of the maximum precisions at different recall values. Please get more information from the following articles: https://sanchom.wordpress.com/tag/average-precision/ + https://arxiv.org/abs/1512.02325 + + The general steps are as follows: + + 1. calculate the true positive and false positive according to the input + of detection and labels. + 2. calculate mAP value, support two versions: '11 point' and 'integral'. + + Examples: + .. code-block:: python + + pred = fluid.layers.fc(input=data, size=1000, act="tanh") + batch_map = layers.detection_map( + input, + label, + class_num, + background_label, + overlap_threshold=overlap_threshold, + evaluate_difficult=evaluate_difficult, + ap_version=ap_version) + metric = fluid.metrics.DetectionMAP() + for data in train_reader(): + loss, preds, labels = exe.run(fetch_list=[cost, batch_map]) + batch_size = data[0] + metric.update(value=batch_map, weight=batch_size) + numpy_map = metric.eval() """ def __init__(self, name=None): @@ -302,17 +537,18 @@ class DetectionMAP(MetricBase): class Auc(MetricBase): """ - Auc Metrics which adapts to binary classification. - Need to note that auc metrics compute the value via Python natively. + Auc metric adapts to the binary classification. + Refer to https://en.wikipedia.org/wiki/Receiver_operating_characteristic#Area_under_the_curve + Need to note that auc metric compute the value via Python natively. If you concern the speed, please use the fluid.layers.auc instead. The `auc` function creates four local variables, `true_positives`, - `true_negatives`, `false_positives` and `false_negatives` that are used to - compute the AUC. To discretize the AUC curve, a linearly spaced set of - thresholds is used to compute pairs of recall and precision values. The area - under the ROC-curve is therefore computed using the height of the recall - values by the false positive rate, while the area under the PR-curve is the - computed using the height of the precision values by the recall. + `true_negatives`, `false_positives` and `false_negatives` that are used to + compute the AUC. To discretize the AUC curve, a linearly spaced set of + thresholds is used to compute pairs of recall and precision values. The area + under the ROC-curve is therefore computed using the height of the recall + values by the false positive rate, while the area under the PR-curve is the + computed using the height of the precision values by the recall. Args: name: metric name @@ -322,22 +558,32 @@ class Auc(MetricBase): curve. "NOTE: only implement the ROC curve type via Python now." + + Examples: + .. code-block:: python + + pred = fluid.layers.fc(input=data, size=1000, act="tanh") + metric = fluid.metrics.Auc() + for data in train_reader(): + loss, preds, labels = exe.run(fetch_list=[cost, preds, labels]) + metric.update(preds, labels) + numpy_auc = metric.eval() """ def __init__(self, name, curve='ROC', num_thresholds=200): - super(MetricBase, self).__init__(name, curve, num_thresholds) + super(Auc, self).__init__(name=name) self._curve = curve self._num_thresholds = num_thresholds self._epsilon = 1e-6 - self.tp_list = np.ndarray((num_thresholds, )) - self.fn_list = np.ndarray((num_thresholds, )) - self.tn_list = np.ndarray((num_thresholds, )) - self.fp_list = np.ndarray((num_thresholds, )) + self.tp_list = np.zeros((num_thresholds, )) + self.fn_list = np.zeros((num_thresholds, )) + self.tn_list = np.zeros((num_thresholds, )) + self.fp_list = np.zeros((num_thresholds, )) - def update(self, labels, predictions, axis=1): + def update(self, preds, labels): if not _is_numpy_(labels): raise ValueError("The 'labels' must be a numpy ndarray.") - if not _is_numpy_(predictions): + if not _is_numpy_(preds): raise ValueError("The 'predictions' must be a numpy ndarray.") kepsilon = 1e-7 # to account for floating point imprecisions @@ -350,12 +596,12 @@ class Auc(MetricBase): tp, fn, tn, fp = 0, 0, 0, 0 for i, lbl in enumerate(labels): if lbl: - if predictions[i, 0] >= thresh: + if preds[i, 1] >= thresh: tp += 1 else: fn += 1 else: - if predictions[i, 0] >= thresh: + if preds[i, 1] >= thresh: fp += 1 else: tn += 1 diff --git a/python/paddle/fluid/nets.py b/python/paddle/fluid/nets.py index bbedf6fde0..9b3f2aebee 100644 --- a/python/paddle/fluid/nets.py +++ b/python/paddle/fluid/nets.py @@ -26,16 +26,87 @@ def simple_img_conv_pool(input, filter_size, pool_size, pool_stride, - act, - param_attr=None, + pool_padding=0, pool_type='max', + global_pooling=False, + conv_stride=1, + conv_padding=0, + conv_dilation=1, + conv_groups=1, + param_attr=None, + bias_attr=None, + act=None, use_cudnn=True, use_mkldnn=False): + """ + The simple_img_conv_pool is composed with one Convolution2d and one Pool2d. + + Args: + input (Variable): The input image with [N, C, H, W] format. + num_filters(int): The number of filter. It is as same as the output + feature channel. + filter_size (int|list|tuple): The filter size. If filter_size is a list or + tuple, it must contain two integers, (filter_size_H, filter_size_W). Otherwise, + the filter_size_H = filter_size_W = filter_size. + pool_size (int|list|tuple): The pooling size of Pool2d layer. If pool_size + is a list or tuple, it must contain two integers, (pool_size_H, pool_size_W). + Otherwise, the pool_size_H = pool_size_W = pool_size. + pool_stride (int|list|tuple): The pooling stride of Pool2d layer. If pool_stride + is a list or tuple, it must contain two integers, (pooling_stride_H, pooling_stride_W). + Otherwise, the pooling_stride_H = pooling_stride_W = pool_stride. + pool_padding (int|list|tuple): The padding of Pool2d layer. If pool_padding is a list or + tuple, it must contain two integers, (pool_padding_H, pool_padding_W). + Otherwise, the pool_padding_H = pool_padding_W = pool_padding. Default 0. + pool_type (str): Pooling type can be :math:`max` for max-pooling and :math:`avg` for + average-pooling. Default :math:`max`. + global_pooling (bool): Whether to use the global pooling. If global_pooling = true, + pool_size and pool_padding while be ignored. Default False + conv_stride (int|list|tuple): The stride size of the Conv2d Layer. If stride is a + list or tuple, it must contain two integers, (conv_stride_H, conv_stride_W). Otherwise, + the conv_stride_H = conv_stride_W = conv_stride. Default: conv_stride = 1. + conv_padding (int|list|tuple): The padding size of the Conv2d Layer. If padding is + a list or tuple, it must contain two integers, (conv_padding_H, conv_padding_W). + Otherwise, the conv_padding_H = conv_padding_W = conv_padding. Default: conv_padding = 0. + conv_dilation (int|list|tuple): The dilation size of the Conv2d Layer. If dilation is + a list or tuple, it must contain two integers, (conv_dilation_H, conv_dilation_W). + Otherwise, the conv_dilation_H = conv_dilation_W = conv_dilation. Default: conv_dilation = 1. + conv_groups (int): The groups number of the Conv2d Layer. According to grouped + convolution in Alex Krizhevsky's Deep CNN paper: when group=2, + the first half of the filters is only connected to the first half + of the input channels, while the second half of the filters is only + connected to the second half of the input channels. Default: groups=1 + param_attr (ParamAttr): The parameters to the Conv2d Layer. Default: None + bias_attr (ParamAttr): Bias parameter for the Conv2d layer. Default: None + act (str): Activation type for Conv2d. Default: None + use_cudnn (bool): Use cudnn kernel or not, it is valid only when the cudnn + library is installed. Default: True + use_mkldnn (bool): Use mkldnn kernels or not, it is valid only when compiled + with mkldnn library. Default: False + + Return: + Variable: The result of input after Convolution2d and Pool2d. + + Examples: + .. code-block:: python + + img = fluid.layers.data(name='img', shape=[1, 28, 28], dtype='float32') + conv_pool = fluid.nets.simple_img_conv_pool(input=img, + filter_size=5, + num_filters=20, + pool_size=2, + pool_stride=2, + act="relu") + """ conv_out = layers.conv2d( input=input, num_filters=num_filters, filter_size=filter_size, + stride=conv_stride, + padding=conv_padding, + dilation=conv_dilation, + groups=conv_groups, param_attr=param_attr, + bias_attr=bias_attr, act=act, use_cudnn=use_cudnn, use_mkldnn=use_mkldnn) @@ -45,6 +116,8 @@ def simple_img_conv_pool(input, pool_size=pool_size, pool_type=pool_type, pool_stride=pool_stride, + pool_padding=pool_padding, + global_pooling=global_pooling, use_cudnn=use_cudnn, use_mkldnn=use_mkldnn) return pool_out @@ -60,11 +133,65 @@ def img_conv_group(input, conv_with_batchnorm=False, conv_batchnorm_drop_rate=0.0, pool_stride=1, - pool_type=None, + pool_type="max", use_cudnn=True, use_mkldnn=False): """ - Image Convolution Group, Used for vgg net. + The Image Convolution Group is composed of Convolution2d, BatchNorm, DropOut, + and Pool2d. According to the input arguments, img_conv_group will do serials of + computation for Input using Convolution2d, BatchNorm, DropOut, and pass the last + result to Pool2d. + + Args: + input (Variable): The input image with [N, C, H, W] format. + conv_num_filter(list|tuple): Indicates the numbers of filter of this group. + pool_size (int|list|tuple): The pooling size of Pool2d Layer. If pool_size + is a list or tuple, it must contain two integers, (pool_size_H, pool_size_W). + Otherwise, the pool_size_H = pool_size_W = pool_size. + conv_padding (int|list|tuple): The padding size of the Conv2d Layer. If padding is + a list or tuple, its length must be equal to the length of conv_num_filter. + Otherwise the conv_padding of all Conv2d Layers are the same. Default 1. + conv_filter_size (int|list|tuple): The filter size. If filter_size is a list or + tuple, its length must be equal to the length of conv_num_filter. + Otherwise the conv_filter_size of all Conv2d Layers are the same. Default 3. + conv_act (str): Activation type for Conv2d Layer that is not followed by BatchNorm. + Default: None. + param_attr (ParamAttr): The parameters to the Conv2d Layer. Default: None + conv_with_batchnorm (bool|list): Indicates whether to use BatchNorm after Conv2d Layer. + If conv_with_batchnorm is a list, its length must be equal to the length of + conv_num_filter. Otherwise, conv_with_batchnorm indicates whether all the + Conv2d Layer follows a BatchNorm. Default False. + conv_batchnorm_drop_rate (float|list): Indicates the drop_rate of Dropout Layer + after BatchNorm. If conv_batchnorm_drop_rate is a list, its length must be + equal to the length of conv_num_filter. Otherwise, drop_rate of all Dropout + Layers is conv_batchnorm_drop_rate. Default 0.0. + pool_stride (int|list|tuple): The pooling stride of Pool2d layer. If pool_stride + is a list or tuple, it must contain two integers, (pooling_stride_H, + pooling_stride_W). Otherwise, the pooling_stride_H = pooling_stride_W = pool_stride. + Default 1. + pool_type (str): Pooling type can be :math:`max` for max-pooling and :math:`avg` for + average-pooling. Default :math:`max`. + use_cudnn (bool): Use cudnn kernel or not, it is valid only when the cudnn + library is installed. Default: True + use_mkldnn (bool): Use mkldnn kernels or not, it is valid only when compiled + with mkldnn library. Default: False + + Return: + Variable: The final result after serial computation using Convolution2d, + BatchNorm, DropOut, and Pool2d. + + Examples: + .. code-block:: python + + img = fluid.layers.data(name='img', shape=[1, 28, 28], dtype='float32') + conv_pool = fluid.nets.img_conv_group(input=img, + num_channels=3, + conv_padding=1, + conv_num_filter=[3, 3], + conv_filter_size=3, + conv_act="relu", + pool_size=2, + pool_stride=2) """ tmp = input assert isinstance(conv_num_filter, list) or \ @@ -74,6 +201,7 @@ def img_conv_group(input, if not hasattr(obj, '__len__'): return [obj] * len(conv_num_filter) else: + assert len(obj) == len(conv_num_filter) return obj conv_padding = __extend_list__(conv_padding) @@ -119,6 +247,39 @@ def sequence_conv_pool(input, param_attr=None, act="sigmoid", pool_type="max"): + """ + The sequence_conv_pool is composed with Sequence Convolution and Pooling. + + Args: + input (Variable): The input of sequence_conv, which supports variable-time + length input sequence. The underlying of input is a matrix with shape + (T, N), where T is the total time steps in this mini-batch and N is + the input_hidden_size + num_filters(int): The number of filter. + filter_size (int): The filter size. + param_attr (ParamAttr): The parameters to the Sequence_conv Layer. Default: None. + act (str): Activation type for Sequence_conv Layer. Default: "sigmoid". + pool_type (str): Pooling type can be :math:`max` for max-pooling, :math:`average` for + average-pooling, :math:`sum` for sum-pooling, :math:`sqrt` for sqrt-pooling. + Default :math:`max`. + + Return: + Variable: The final result after Sequence Convolution and Pooling. + + Examples: + .. code-block:: python + + input_dim = len(word_dict) + emb_dim = 128 + hid_dim = 512 + data = fluid.layers.data( ame="words", shape=[1], dtype="int64", lod_level=1) + emb = fluid.layers.embedding(input=data, size=[input_dim, emb_dim], is_sparse=True) + seq_conv = fluid.nets.sequence_conv_pool(input=emb, + num_filters=hid_dim, + filter_size=3, + act="tanh", + pool_type="sqrt") + """ conv_out = layers.sequence_conv( input=input, num_filters=num_filters, @@ -132,9 +293,9 @@ def sequence_conv_pool(input, def glu(input, dim=-1): """ - The gated linear unit composed by split, sigmoid activation and elementwise - multiplication. Specifically, Split the input into two equal sized parts - :math:`a` and :math:`b` along the given dimension and then compute as + The Gated Linear Units(GLU) composed by split, sigmoid activation and element-wise + multiplication. Specifically, Split the input into two equal sized parts, + :math:`a` and :math:`b`, along the given dimension and then compute as following: .. math:: @@ -147,16 +308,16 @@ def glu(input, dim=-1): Args: input (Variable): The input variable which is a Tensor or LoDTensor. dim (int): The dimension along which to split. If :math:`dim < 0`, the - dimension to split along is :math:`rank(input) + dim`. + dimension to split along is :math:`rank(input) + dim`. Default -1. Returns: - Variable: The Tensor variable with half the size of input. + Variable: Variable with half the size of input. Examples: .. code-block:: python - # x is a Tensor variable with shape [3, 6, 9] - fluid.nets.glu(input=x, dim=1) # shape of output: [3, 3, 9] + data = fluid.layers.data(name="words", shape=[3, 6, 9], dtype="float32") + output = fluid.nets.glu(input=data, dim=1) # shape of output: [3, 3, 9] """ a, b = layers.split(input, num_or_sections=2, dim=dim) @@ -189,40 +350,48 @@ def scaled_dot_product_attention(queries, `_. Args: - queries (Variable): The input variable which should be a 3-D Tensor. keys (Variable): The input variable which should be a 3-D Tensor. values (Variable): The input variable which should be a 3-D Tensor. num_heads (int): Head number to compute the scaled dot product - attention. Default value is 1. + attention. Default: 1. dropout_rate (float): The dropout rate to drop the attention weight. - Default value is 0. + Default: 0.0. Returns: - - Variable: A 3-D Tensor computed by multi-head scaled dot product \ - attention. + Variable: A 3-D Tensor computed by multi-head scaled dot product\ + attention. Raises: - ValueError: If input queries, keys, values are not 3-D Tensors. - NOTE: + NOTES: 1. When num_heads > 1, three linear projections are learned respectively - to map input queries, keys and values into queries', keys' and values'. - queries', keys' and values' have the same shapes with queries, keys - and values. - - 1. When num_heads == 1, scaled_dot_product_attention has no learnable - parameters. + to map input queries, keys and values into queries', keys' and values'. + queries', keys' and values' have the same shapes with queries, keys + and values. + 2. When num_heads == 1, scaled_dot_product_attention has no learnable + parameters. Examples: .. code-block:: python - # Suppose q, k, v are Tensors with the following shape: - # q: [3, 5, 9], k: [3, 6, 9], v: [3, 6, 10] - - contexts = fluid.nets.scaled_dot_product_attention(q, k, v) + queries = fluid.layers.data(name="queries", + shape=[3, 5, 9], + dtype="float32", + append_batch_size=False) + queries.stop_gradient = False + keys = fluid.layers.data(name="keys", + shape=[3, 6, 9], + dtype="float32", + append_batch_size=False) + keys.stop_gradient = False + values = fluid.layers.data(name="values", + shape=[3, 6, 10], + dtype="float32", + append_batch_size=False) + values.stop_gradient = False + contexts = fluid.nets.scaled_dot_product_attention(queries, keys, values) contexts.shape # [3, 5, 10] """ if not (len(queries.shape) == len(keys.shape) == len(values.shape) == 3): diff --git a/python/paddle/fluid/optimizer.py b/python/paddle/fluid/optimizer.py index 115362c6bf..75ee40fa9c 100644 --- a/python/paddle/fluid/optimizer.py +++ b/python/paddle/fluid/optimizer.py @@ -13,7 +13,7 @@ # limitations under the License. import re from collections import defaultdict -from paddle.fluid.framework import Program +from paddle.fluid.framework import Program, Variable import framework import layers from backward import append_backward @@ -26,10 +26,10 @@ from clip import append_gradient_clip_ops, error_clip_callback from contextlib import contextmanager __all__ = [ - 'SGD', 'Momentum', 'Adagrad', 'Adam', 'Adamax', 'DecayedAdagrad', + 'SGD', 'Momentum', 'Adagrad', 'Adam', 'Adamax', 'DecayedAdagrad', 'Ftrl', 'SGDOptimizer', 'MomentumOptimizer', 'AdagradOptimizer', 'AdamOptimizer', 'AdamaxOptimizer', 'DecayedAdagradOptimizer', 'RMSPropOptimizer', - 'Adadelta', 'ModelAverage', 'Optimizer' + 'FtrlOptimizer', 'Adadelta', 'ModelAverage', 'Optimizer', 'RMSPropOptimizer' ] @@ -41,7 +41,10 @@ class Optimizer(object): but need to use one of it's implementation. """ - def __init__(self, learning_rate, regularization=None): + def __init__(self, + learning_rate, + regularization=None, + LARS_weight_decay=0.0): if not isinstance(learning_rate, float) and \ not isinstance(learning_rate, framework.Variable): raise TypeError("learning rate should be float or Variable") @@ -61,6 +64,7 @@ class Optimizer(object): # {accum_name : { paramter_name : accumulator_for_parameter, ...}, ...} self._accumulators = defaultdict(lambda: dict()) self.helper = None + self._LARS_weight_decay = LARS_weight_decay def _create_global_learning_rate(self): lr = self.global_learning_rate() @@ -100,10 +104,15 @@ class Optimizer(object): # create learning rate variable for every parameter param = param_and_grad[0] param_lr = param.optimize_attr['learning_rate'] - if param_lr == 1.0: - return self.global_learning_rate() + if type(param_lr) == Variable: + # param learning rate has been updated (LARS) + print("returns updated param lr ", param_lr) + return param_lr else: - return self.global_learning_rate() * param_lr + if param_lr == 1.0: + return self.global_learning_rate() + else: + return self.global_learning_rate() * param_lr def _create_accumulators(self, block, parameters): """Create all accumulators needed by the parameters @@ -183,15 +192,15 @@ class Optimizer(object): """Add optimization operators to update gradients to variables. Args: - loss: the target that this optimization is for. - parameters_and_grads: a list of (variable, gradient) pair to update. + loss(Variable): the target that this optimization is for. + parameters_and_grads(list(tuple(Variable, Variable))): + a list of (variable, gradient) pair to update. Returns: return_op_list: a list of operators that will complete one step of optimization. This will include parameter update ops, global step update ops and any other custom ops required by subclasses to manage their internal state. - :param startup_program: """ # This is a default implementation of create_optimization_pass that # can be shared by most optimizers. This implementation assumes that @@ -210,6 +219,10 @@ class Optimizer(object): self._create_accumulators(loss.block, [p[0] for p in parameters_and_grads]) self._create_global_learning_rate() + if self._LARS_weight_decay > 0.0: + layers.append_LARS(parameters_and_grads, + self.global_learning_rate(), + self._LARS_weight_decay) optimize_ops = [] for param_and_grad in parameters_and_grads: @@ -255,7 +268,22 @@ class Optimizer(object): class SGDOptimizer(Optimizer): - """ Simple SGD optimizer without any state. + """ + Optimizer of the stochastic gradient descent algorithm. + + .. math:: + + param\_out = param - learning\_rate * grad + + Args: + learning_rate (float|Variable): the learning rate used to update parameters. \ + Can be a float value or a Variable with one float value as data element. + + Examples: + .. code-block:: python + + sgd_optimizer = fluid.optimizer.SGD(learning_rate=0.2) + sgd_optimizer.minimize(cost) """ def __init__(self, learning_rate, **kwargs): @@ -281,7 +309,37 @@ class SGDOptimizer(Optimizer): class MomentumOptimizer(Optimizer): - """Simple Momentum optimizer with velocity state + """ + + Simple Momentum optimizer with velocity state + + This optimizer has a flag for Nestrov Momentum. + + The update equations are as follows: + + .. math:: + + & velocity = mu * velocity + gradient + + & if (use\_nesterov): + + &\quad param = param - gradient * learning\_rate + mu * velocity * learning\_rate + + & else: + + &\quad param = param - learning\_rate * velocity + + Args: + learning_rate (float|Variable): the learning rate used to update parameters. \ + Can be a float value or a Variable with one float value as data element. + momentum (float): momentum factor + use_nesterov (bool): enables Nesterov momentum + + Examples: + .. code-block:: python + + optimizer = fluid.optimizer.Momentum(learning_rate=0.2, momentum=0.1) + optimizer.minimize(cost) """ _velocity_acc_str = "velocity" @@ -325,7 +383,32 @@ class MomentumOptimizer(Optimizer): class AdagradOptimizer(Optimizer): - """Simple Adagrad optimizer with moment state + """ + **Adaptive Gradient Algorithm (Adagrad)** + + The update is done as follows: + + .. math:: + + moment\_out &= moment + grad * grad + + param\_out &= param - \\frac{learning\_rate * grad}{\sqrt{moment\_out} + \epsilon} + + The original paper(http://www.jmlr.org/papers/volume12/duchi11a/duchi11a.pdf) + does not have the epsilon attribute. It is added here in our implementation + as also proposed here: http://cs231n.github.io/neural-networks-3/#ada + for numerical stability to avoid the division by zero error. + + Args: + learning_rate (float|Variable): the learning rate used to update parameters. \ + Can be a float value or a Variable with one float value as data element. + epsilon (float): a small float value for numerical stability. + + Examples: + .. code-block:: python + + optimizer = fluid.optimizer.Adagrad(learning_rate=0.2) + optimizer.minimize(cost) """ _moment_acc_str = "moment" @@ -366,7 +449,40 @@ class AdagradOptimizer(Optimizer): class AdamOptimizer(Optimizer): - """Implements the Adam Optimizer + """ + This implements the Adam optimizer from Section 2 of the Adam + paper : https://arxiv.org/abs/1412.6980. + Adam is a first-order gradient-based optimization method based on + adaptive estimates of lower-order moments. + + Adam updates: + + .. math:: + + t & = t + 1 + + moment\_1\_out & = {\\beta}_1 * moment\_1 + (1 - {\\beta}_1) * grad + + moment\_2\_out & = {\\beta}_2 * moment\_2 + (1 - {\\beta}_2) * grad * grad + + learning\_rate & = learning\_rate * \\ + \\frac{\sqrt{1 - {\\beta}_2^t}}{1 - {\\beta}_1^t} + + param\_out & = param - learning\_rate * \\frac{moment\_1}{\sqrt{moment\_2} + \epsilon} + + Args: + learning_rate (float|Variable): the learning rate used to update parameters. \ + Can be a float value or a Variable with one float value as data element. + beta1 (float): The exponential decay rate for the 1st moment estimates. + beta2 (float): The exponential decay rate for the 2nd moment estimates. + epsilon (float): a small float value for numerical stability. + + Examples: + .. code-block:: python + + optimizer = fluid.optimizer.Adam(learning_rate=0.2) + optimizer.minimize(cost) + """ _moment1_acc_str = "moment1" _moment2_acc_str = "moment2" @@ -471,7 +587,42 @@ class AdamOptimizer(Optimizer): class AdamaxOptimizer(Optimizer): - """Implements the Adamax Optimizer + """ + We implement the Adamax optimizer from Section 7 of the Adam + paper: https://arxiv.org/abs/1412.6980. Adamax is a variant of the + Adam algorithm based on the infinity norm. + + Adamax updates: + + .. math:: + + t & = t + 1 + + moment\_out & = {\\beta}_1 * moment + (1 - {\\beta}_1) * grad + + inf\_norm\_out & = max({\\beta}_2 * inf\_norm + \epsilon, |grad|) + + learning\_rate & = \\frac{learning\_rate}{1 - {\\beta}_1^t} + + param\_out & = param - learning\_rate * \\frac{moment\_out}{inf\_norm\_out} + + + The original paper does not have an epsilon attribute. + However, it is added here for numerical stability to prevent the + division by 0 error. + + Args: + learning_rate (float|Variable): the learning rate used to update parameters. \ + Can be a float value or a Variable with one float value as data element. + beta1 (float): The exponential decay rate for the 1st moment estimates. + beta2 (float): The exponential decay rate for the 2nd moment estimates. + epsilon (float): a small float value for numerical stability. + + Examples: + .. code-block:: python + + optimizer = fluid.optimizer.Adamax(learning_rate=0.2) + optimizer.minimize(cost) """ _moment_acc_str = "moment" _inf_norm_acc_str = "inf_norm" @@ -555,7 +706,34 @@ class AdamaxOptimizer(Optimizer): class DecayedAdagradOptimizer(Optimizer): - """Simple Decayed Adagrad optimizer with moment state + """ + **Decayed Adagrad Optimizer** + + The original paper(http://www.jmlr.org/papers/volume12/duchi11a/duchi11a.pdf) + + The update is done as follows: + + .. math:: + + moment\_out & = decay * moment + (1 - decay) * grad * grad + + param\_out & = param - \\frac{learning\_rate * grad}{\sqrt{moment\_out} + \epsilon} + + The original paper(http://www.jmlr.org/papers/volume12/duchi11a/duchi11a.pdf) + does not have an epsilon attribute. It is added here for numerical + stability to avoid the division by zero error. + + Args: + learning_rate (float|Variable): the learning rate used to update parameters. \ + Can be a float value or a Variable with one float value as data element. + decay (float): decay rate. + epsilon (float): a small float value for numerical stability. + + Examples: + .. code-block:: python + + optimizer = fluid.optimizer.DecayedAdagrad(learning_rate=0.2) + optimizer.minimize(cost) """ _moment_acc_str = "moment" @@ -601,6 +779,7 @@ class DecayedAdagradOptimizer(Optimizer): class AdadeltaOptimizer(Optimizer): """ **Adadelta Optimizer** + Simple Adadelta optimizer with average squared grad state and average squared update state. The details of adadelta please refer to this @@ -615,7 +794,7 @@ class AdadeltaOptimizer(Optimizer): E(dx_t^2) &= \\rho * E(dx_{t-1}^2) + (1-\\rho) * (-g*learning\\_rate)^2 Args: - learning_rate(float): global leraning rate + learning_rate(float): global learning rate rho(float): rho in equation epsilon(float): epsilon in equation @@ -690,37 +869,37 @@ class RMSPropOptimizer(Optimizer): .. math:: - r(w, t) & = \\rho r(w, t-1) + (1 - \\rho)(\\nabla Q_{i}(w))^2 \\\\ + r(w, t) & = \\rho r(w, t-1) + (1 - \\rho)(\\nabla Q_{i}(w))^2 w & = w - \\frac{\\eta} {\\sqrt{r(w,t) + \\epsilon}} \\nabla Q_{i}(w) The first equation calculates moving average of the squared gradient for - each weight. Then dividing the gradient by :math: `sqrt{v(w,t)}`. + each weight. Then dividing the gradient by :math:`sqrt{v(w,t)}`. In some cases, adding a momentum term :math: `\\beta` is beneficial. In our implementation, Nesterov momentum is used: .. math:: - r(w, t) & = \\rho r(w, t-1) + (1 - \\rho)(\\nabla Q_{i}(w))^2 \\\\ + r(w, t) & = \\rho r(w, t-1) + (1 - \\rho)(\\nabla Q_{i}(w))^2 v(w, t) & = \\beta v(w, t-1) + \\frac{\\eta} {\\sqrt{v(w,t) + \\epsilon}} \\nabla Q_{i}(w) w & = w - v(w, t) - where, :math: `\\rho` is a hyperparameter and typical values are 0.9, 0.95 + where, :math:`\\rho` is a hyperparameter and typical values are 0.9, 0.95 and so on. :math: `beta` is the momentum term. :math: `\\epsilon` is a smoothing term to avoid division by zero, usually set somewhere in range from 1e-4 to 1e-8. Args: - learning_rate(float): global leraning rate. + learning_rate(float): global learning rate. rho(float): rho is :math: `\\rho` in equation, set 0.95 by default. epsilon(float): :math: `\\epsilon` in equation is smoothing term to avoid division by zero, set 1e-6 by default. - momentum(float): :math: `\\beta` in equation is the momentum term, + momentum(float): :math:`\\beta` in equation is the momentum term, set 0.0 by default. Raises: @@ -797,6 +976,113 @@ class RMSPropOptimizer(Optimizer): return rmsprop_op +class FtrlOptimizer(Optimizer): + """ + FTRL (Follow The Regularized Leader) Optimizer. + + The paper that proposed Follow The Regularized Leader (FTRL): + (https://www.eecs.tufts.edu/~dsculley/papers/ad-click-prediction.pdf) + + .. math:: + + &new\_accum = squared\_accum + grad^2 + + &if (lr\_power == -0.5): + + &\quad linear\_accum += grad - \\frac{\\sqrt{new\_accum} - \\sqrt{squared\_accum}}{learning\_rate * param} + + &else: + + &\quad linear\_accum += grad - \\frac{new\_accum^{-lr\_power} - accum^{-lr\_power}}{learning\_rate * param} + + + &x = l1 * sign(linear\_accum) - linear\_accum + + &if (lr\_power == -0.5): + + &\quad y = \\frac{\\sqrt{new\_accum}}{learning\_rate} + (2 * l2) + + &\quad pre\_shrink = \\frac{x}{y} + + &\quad param = (abs(linear\_accum) > l1).select(pre\_shrink, 0.0) + + &else: + + &\quad y = \\frac{new\_accum^{-lr\_power}}{learning\_rate} + (2 * l2) + + &\quad pre\_shrink = \\frac{x}{y} + + &\quad param = (abs(linear\_accum) > l1).select(pre\_shrink, 0.0) + + &squared\_accum += grad^2 + + Args: + learning_rate (float|Variable): global learning rate. + l1 (float): + l2 (float): + lr_power (float): + + Raises: + ValueError: If learning_rate, rho, epsilon, momentum are None. + + Examples: + .. code-block:: python + + optimizer = fluid.optimizer.Ftrl(0.0001) + _, params_grads = optimizer.minimize(cost) + """ + + _squared_acc_str = "squared" + _linear_acc_str = "linear" + + def __init__(self, learning_rate, l1=0.0, l2=0.0, lr_power=-0.5, **kwargs): + super(FtrlOptimizer, self).__init__( + learning_rate=learning_rate, **kwargs) + if learning_rate is None: + raise ValueError("learning_rate is not set.") + + self.type = "ftrl" + self._l1 = l1 + self._l2 = l2 + self._lr_power = lr_power + + def _create_accumulators(self, block, parameters): + if not isinstance(block, framework.Block): + raise TypeError("block is not instance of framework.Block.") + + for p in parameters: + self._add_accumulator(self._squared_acc_str, p) + self._add_accumulator(self._linear_acc_str, p) + + def _append_optimize_op(self, block, param_and_grad): + if not isinstance(block, framework.Block): + raise TypeError("block is not instance of framework.Block.") + + squared_acc = self._get_accumulator(self._squared_acc_str, + param_and_grad[0]) + linear_acc = self._get_accumulator(self._linear_acc_str, + param_and_grad[0]) + ftrl_op = block.append_op( + type=self.type, + inputs={ + "Param": param_and_grad[0], + "Grad": param_and_grad[1], + "SquaredAccumulator": squared_acc, + "LinearAccumulator": linear_acc, + "LearningRate": self._create_param_lr(param_and_grad), + }, + outputs={ + "ParamOut": param_and_grad[0], + "SquaredAccumOut": squared_acc, + "LinearAccumOut": linear_acc + }, + attrs={"l1": self._l1, + "l2": self._l1, + "lr_power": self._lr_power}) + + return ftrl_op + + # We short the class name, since users will use the optimizer with the package # name. The sample code: # @@ -813,6 +1099,7 @@ Adamax = AdamaxOptimizer DecayedAdagrad = DecayedAdagradOptimizer Adadelta = AdadeltaOptimizer RMSProp = RMSPropOptimizer +Ftrl = FtrlOptimizer class ModelAverage(Optimizer): @@ -826,15 +1113,16 @@ class ModelAverage(Optimizer): Args: average_window_rate: The rate of average window. - params_grads: A list of parameter-grad variable pairs. min_average_window: The minimum size of average window. max_average_window: The maximum size of average window. Examples: - ... + + .. code-block:: python + optimizer = fluid.optimizer.Momentum() - _, params_grads = optimizer.minimize(cost) - model_average = fluid.optimizer.ModelAverage(params_grads, 0.15, + optimizer.minimize(cost) + model_average = fluid.optimizer.ModelAverage(0.15, min_average_window=10000, max_average_window=20000) for pass_id in range(args.pass_num): @@ -848,7 +1136,6 @@ class ModelAverage(Optimizer): def __init__(self, average_window_rate, - params_grads=None, min_average_window=10000, max_average_window=10000, **kwargs): @@ -857,21 +1144,16 @@ class ModelAverage(Optimizer): self.min_average_window = min_average_window self.max_average_window = max_average_window - self.params_grads = [] if params_grads is None else params_grads - params = {} - for param, grad in self.params_grads: - if param.do_model_average != False: - params[param.name] = (param, grad) + self.params_grads = [] for param in framework.default_main_program().global_block( ).all_parameters(): - if param.name not in params and param.do_model_average != False: + if param.do_model_average != False: grad = param.block.create_var( name=unique_name.generate(".".join([param.name, 'tmp'])), dtype=param.dtype, persistable=False, stop_gradient=True) - params[param.name] = (param, grad) - self.params_grads = params.values() + self.params_grads.append((param, grad)) for param, grad in self.params_grads: self._append_average_accumulate_op(param) diff --git a/python/paddle/fluid/parallel_executor.py b/python/paddle/fluid/parallel_executor.py index 0fdc9a0352..6baf648198 100644 --- a/python/paddle/fluid/parallel_executor.py +++ b/python/paddle/fluid/parallel_executor.py @@ -27,6 +27,40 @@ BuildStrategy = core.ParallelExecutor.BuildStrategy class ParallelExecutor(object): + """ + ParallelExecutor can run program in parallel. + + Args: + use_cuda (bool): Whether to use CUDA or not. + loss_name (str): The loss name must set in training. Default None. + main_program (Program): The program that need to run, if not provided, + then default_main_program will be used. Default None. + share_vars_from(ParallelExecutor): If provied, it will share variables + from the specified ParallelExecutor. Default None. + num_trainers(int): If greater than 1, NCCL will be initialized with + multiple rank of nodes, each node should have same number of GPUs. + Distributed training will be enabled then. Default 1. + trainer_id(int: Must use together with num_trainers. trainer_id is the + "rank" of current node starts from 0. Default 0. + + Returns: + ParallelExecutor: The initialized ParallelExecutor object. + + Raises: + TypeError: If share_vars_from is provided, but not ParallelExecutor object. + + Examples: + .. code-block:: python + + train_exe = fluid.ParallelExecutor(use_cuda=True, loss_name=loss.name) + test_exe = fluid.ParallelExecutor(use_cuda=True, + main_program=test_program, + share_vars_from=train_exe) + + train_loss, = train_exe.run([loss.name], feed=feed_dict) + test_loss, = test_exe.run([loss.name], feed=feed_dict) + """ + def __init__(self, use_cuda, loss_name=None, @@ -37,42 +71,6 @@ class ParallelExecutor(object): num_trainers=1, trainer_id=0, **kwargs): - """ - ParallelExecutor can run program in parallel. - - Args: - use_cuda(bool): Whether to use CUDA or not. - loss_name(str, default None): The loss name must set in training. - main_program(Program, default None): The program that need to run, - if not provided, then default_main_program will be used. - share_vars_from(ParallelExecutor, default None): If provied, - it will share variables from the specified ParallelExecutor. - num_trainers(int, default 1): If greater than 1, NCCL will be - initialized with multpile rank of nodes, each node should have - same number of GPUs. Distributed training will be enabled then. - trainer_id(int, default 0): Must use together with num_trainers. - trainer_id is the "rank" of current node starts from 0. - - Returns: - A ParallelExecutor object. - - Raises: - TypeError: If share_vars_from is provided, but not ParallelExecutor - object. - - Examples: - .. code-block:: python - - train_exe = fluid.ParallelExecutor( - use_cuda=True, loss_name=loss.name) - test_exe = fluid.ParallelExecutor( - use_cuda=True, - main_program=test_program, - share_vars_from=train_exe) - - train_loss, = train_exe.run([loss.name], feed=feed_dict) - test_loss, = test_exe.run([loss.name], feed=feed_dict) - """ if len(kwargs) != 0: err_msg = "" for key in kwargs: @@ -131,10 +129,16 @@ class ParallelExecutor(object): main = main_program main = main if main else framework.default_main_program() scope = executor.global_scope() + # FIXME(Yancey1989): it's a temporary approach to determinate the distribute + # train program, call self.bcast_param() at the end of each mini-batch. + self.is_dist = True if "recv" in [ + op.type for op in main.global_block().ops + ] else False if share_vars_from and not isinstance(share_vars_from, ParallelExecutor): raise TypeError("share_vars_from must be ParallelExecutor.") + local_scopes = share_vars_from.executor.local_scopes( ) if share_vars_from else [] @@ -156,7 +160,7 @@ class ParallelExecutor(object): build_strategy, num_trainers, trainer_id) self.scope = scope - def run(self, fetch_list, feed=None, feed_dict=None): + def run(self, fetch_list, feed=None, feed_dict=None, return_numpy=True): """ Run a parallel executor with fetch_list. @@ -166,12 +170,14 @@ class ParallelExecutor(object): element in the list will be copied to each device directly. For example, if the feed is a dict: + >>> exe = ParallelExecutor() >>> # the image will be splitted into devices. If there is two devices >>> # each device will process an image with shape (24, 1, 28, 28) >>> exe.run(feed={'image': numpy.random.random(size=(48, 1, 28, 28))}) For example, if the feed is a list: + >>> exe = ParallelExecutor() >>> # each device will process each element in the list. >>> # the 1st device will process an image with shape (48, 1, 28, 28) @@ -182,18 +188,42 @@ class ParallelExecutor(object): >>> {"image": numpy.random.random(size=(32, 1, 28, 28))}, >>> ]) - Args: fetch_list(list): The fetched variable names feed(list|dict|None): The feed variables. If the feed is a dict, tensors in that dict will be splitted into each devices. If the feed is a list, each element of the list will be copied - to each device. + to each device. Default None. feed_dict: Alias for feed parameter, for backward compatibility. - This parameter is deprecated. + This parameter has been deprecated. Default None. + return_numpy(bool): Whether converts the fetched tensor to numpy. + Default: True. + + Returns: + List: The fetched result list. + + Raises: + ValueError: If the feed is a list, but its length is not equal the + length of active places, or its element's is not dict. + + NOTES: + 1. If the feed's type is dict, the number of data that feeds to + ParallelExecutor must be bigger than active places. Otherwise, + it will throw exception from C++ side. Special attention should be + paid to check whether the last batch of the dataset is bigger + than active places. + 2. If active places are more than one, the fetch results for each + variable is a list, and each element of this list is the variable of + respective active place. - Returns: fetched result list. + Examples: + .. code-block:: python + pe = fluid.ParallelExecutor(use_cuda=use_cuda, + loss_name=avg_cost.name, + main_program=fluid.default_main_program()) + loss = pe.run(feed=feeder.feed(cur_batch), + fetch_list=[avg_cost.name])) """ if feed is None and feed_dict is not None: feed = feed_dict @@ -238,9 +268,20 @@ class ParallelExecutor(object): fetch_var_name = '@FETCHED_VAR_NAME@' self.executor.run(fetch_list, fetch_var_name) arr = self.scope.find_var(fetch_var_name).get_lod_tensor_array() + + if self.is_dist: + self.bcast_params() + + if return_numpy: + return executor.as_numpy(arr) + return [arr[i] for i in range(len(arr))] def bcast_params(self): + """ + Broadcast the parameters to other devices. It is used during + distributed training. + """ self.executor.bcast_params(set(self.persistable_vars)) @property diff --git a/python/paddle/fluid/param_attr.py b/python/paddle/fluid/param_attr.py index 1c6970441b..0a42b9fca8 100644 --- a/python/paddle/fluid/param_attr.py +++ b/python/paddle/fluid/param_attr.py @@ -22,6 +22,35 @@ __all__ = [ class ParamAttr(object): + """ + Parameter attributes object. To fine-tuning network training process, user + can set parameter's attributes to control training details. Such as learning rate, + regularization, trainable, do_model_average and the method to initialize param. + + + Args: + name(str): The parameter's name. Default None. + initializer(Initializer): The method to initial this parameter. Default None. + learning_rate(float): The parameter's learning rate. The learning rate when + optimize is :math:`global\_lr * parameter\_lr * scheduler\_factor`. + Default 1.0. + regularizer(WeightDecayRegularizer): Regularization factor. Default None. + trainable(bool): Whether this parameter is trainable. Default True. + gradient_clip(BaseGradientClipAttr): The method to clip this parameter's + gradient. Default None. + do_model_average(bool): Whether this parameter should do model average. + Default False. + + Examples: + .. code-block:: python + + w_param_attrs = fluid.ParamAttr(name="fc_weight", + learning_rate=0.5, + regularizer=fluid.L2Decay(1.0), + trainable=True) + y_predict = fluid.layers.fc(input=x, size=10, param_attr=w_param_attrs) + """ + def __init__(self, name=None, initializer=None, @@ -29,7 +58,7 @@ class ParamAttr(object): regularizer=None, trainable=True, gradient_clip=None, - do_model_average=None): + do_model_average=False): self.name = name self.initializer = initializer self.learning_rate = learning_rate @@ -39,6 +68,16 @@ class ParamAttr(object): self.model_average = do_model_average def set_default_initializer(self, initializer): + """ + Set the default initializer, the initializer should be Constant, + Uniform, Normal, Xavier, MSRA. + + Args: + initializer(Initializer): the initializer to set. + + Returns: + None + """ if initializer is None: if self.initializer is None: raise ValueError("ParamAttr.initializer is not set") @@ -50,13 +89,45 @@ class ParamAttr(object): self.initializer = initializer def set_default_param_initializer(self): + """ + Set the default initializer for the parameter with Xavier. + + Args: + None. + + Returns: + None. + """ self.set_default_initializer(Xavier()) def set_default_bias_initializer(self): + """ + Set the default initializer for the bias with Constant(0.0). + + Args: + None. + + Returns: + None. + """ self.set_default_initializer(Constant(0.0)) @staticmethod def to_attr(arg): + """ + Create ParamAttr[s]. + + Args: + arg: Arguments to initialize ParamAttr[s]. arg's type can be + str, Initializer, float, WeightDecayRegularizer, BaseGradientClipAttr, + bool, ParamAttr, or a list of above type. + + Returns: + ParamAttr[s]: ParamAttr[s] initialized with arg. + + Raises: + arg can not initialize a ParamAttr. + """ if arg is None: return ParamAttr() elif isinstance(arg, list) or isinstance(arg, tuple): @@ -75,6 +146,15 @@ class ParamAttr(object): raise TypeError("{0} cast to ParamAttr".format(type(arg))) def to_kwargs(self, with_initializer=False): + """ + Returns the attributes of this parameter. + + Args: + with_initializer(bool): Whether to add initializer attr. + + Returns: + Parameter attributes(map): The attributes of this parameter. + """ kwargs = { 'name': self.name, 'optimize_attr': { @@ -92,9 +172,27 @@ class ParamAttr(object): class WeightNormParamAttr(ParamAttr): """ - Used for weight normalization. Any field in ParamAttr can also be set here. - Besides, an extra field dim can be set to indicate the dimension except - which to normalize. + Used for weight Norm. Weight Norm is a reparameterization of the weight vectors + in a neural network that decouples the length of those weight vectors from + their direction. Weight Norm has been implemented as discussed in this + paper: `Weight Normalization: A Simple Reparameterization to Accelerate + Training of Deep Neural Networks + `_. + + Args: + dim(list): The parameter's name. Default None. + kwargs: Any field in ParamAttr. Default None. + + Examples: + .. code-block:: python + + data = fluid.layers.data(name="data", shape=[3, 32, 32], dtype="float32") + fc = fluid.layers.fc(input=data, + size=1000, + param_attr=WeightNormParamAttr( + dim=None, + name='weight_norm_param')) + """ # List to record the parameters reparameterized by weight normalization. # If these parameters are treated as Variable rather than Parameter, diff --git a/python/paddle/fluid/profiler.py b/python/paddle/fluid/profiler.py index e2bd1d4c9a..6a321ae024 100644 --- a/python/paddle/fluid/profiler.py +++ b/python/paddle/fluid/profiler.py @@ -42,6 +42,9 @@ def cuda_profiler(output_file, output_mode=None, config=None): counters/options for profiling by `config` argument. The default config is ['gpustarttimestamp', 'gpustarttimestamp', 'gridsize3d', 'threadblocksize', 'streamid', 'enableonstart 0', 'conckerneltrace']. + Then users can use NVIDIA Visual Profiler + (https://developer.nvidia.com/nvidia-visual-profiler) tools to load this + this output file to visualize results. Args: output_file (string) : The output file name, the result will be @@ -50,6 +53,33 @@ def cuda_profiler(output_file, output_mode=None, config=None): Comma separated values format. It should be 'kvp' or 'csv'. config (list of string) : The profiler options and counters can refer to "Compute Command Line Profiler User Guide". + + Raises: + ValueError: If `output_mode` is not in ['kvp', 'csv']. + + Examples: + + .. code-block:: python + + import paddle.fluid as fluid + import paddle.fluid.profiler as profiler + + epoc = 8 + dshape = [4, 3, 28, 28] + data = fluid.layers.data(name='data', shape=[3, 28, 28], dtype='float32') + conv = fluid.layers.conv2d(data, 20, 3, stride=[1, 1], padding=[1, 1]) + + place = fluid.CUDAPlace(0) + exe = fluid.Executor(place) + exe.run(fluid.default_startup_program()) + + output_file = 'cuda_profiler.txt' + with profiler.cuda_profiler(output_file, 'csv') as nvprof: + for i in range(epoc): + input = np.random.random(dshape).astype('float32') + exe.run(fluid.default_main_program(), feed={'data': input}) + # then use NVIDIA Visual Profiler (nvvp) to load this output file + # to visualize results. """ if output_mode is None: output_mode = 'csv' @@ -69,19 +99,52 @@ def cuda_profiler(output_file, output_mode=None, config=None): def reset_profiler(): - """The profiler clear interface. - reset_profiler will clear the previous time record. + """ + Clear the previous time record. This interface does not work for + `fluid.profiler.cuda_profiler`, it only works for + `fluid.profiler.start_profiler`, `fluid.profiler.stop_profiler`, + and `fluid.profiler.profiler`. + + Examples: + + .. code-block:: python + + import paddle.fluid.profiler as profiler + with profiler.profiler(state, 'total', '/tmp/profile'): + for iter in range(10): + if iter == 2: + profiler.reset_profiler() + # ... """ core.reset_profiler() def start_profiler(state): - """Enable the profiler. + """ + Enable the profiler. Uers can use `fluid.profiler.start_profiler` and + `fluid.profiler.stop_profiler` to insert the code, except the usage of + `fluid.profiler.profiler` interface. Args: state (string) : The profiling state, which should be 'CPU', 'GPU' or 'All'. 'CPU' means only profile CPU. 'GPU' means profiling GPU as well. 'All' also generates timeline. + + Raises: + ValueError: If `state` is not in ['CPU', 'GPU', 'All']. + + Examples: + + .. code-block:: python + + import paddle.fluid.profiler as profiler + + profiler.start_profiler('GPU') + for iter in range(10): + if iter == 2: + profiler.reset_profiler() + # except each iteration + profiler.stop_profiler('total', '/tmp/profile') """ if core.is_profiler_enabled(): return @@ -97,7 +160,10 @@ def start_profiler(state): def stop_profiler(sorted_key=None, profile_path='/tmp/profile'): - """Stop the profiler. + """ + Stop the profiler. Uers can use `fluid.profiler.start_profiler` and + `fluid.profiler.stop_profiler` to insert the code, except the usage of + `fluid.profiler.profiler` interface. Args: sorted_key (string) : If None, the profiling results will be printed @@ -111,6 +177,23 @@ def stop_profiler(sorted_key=None, profile_path='/tmp/profile'): The `ave` means sorting by the average execution time. profile_path (string) : If state == 'All', it will write a profile proto output file. + + Raises: + ValueError: If `sorted_key` is not in + ['calls', 'total', 'max', 'min', 'ave']. + + Examples: + + .. code-block:: python + + import paddle.fluid.profiler as profiler + + profiler.start_profiler('GPU') + for iter in range(10): + if iter == 2: + profiler.reset_profiler() + # except each iteration + profiler.stop_profiler('total', '/tmp/profile') """ if not core.is_profiler_enabled(): return @@ -137,7 +220,12 @@ def profiler(state, sorted_key=None, profile_path='/tmp/profile'): Different from cuda_profiler, this profiler can be used to profile both CPU and GPU program. By defalut, it records the CPU and GPU operator kernels, if you want to profile other program, you can refer the profiling tutorial - to add more records. + to add more records in C++ code. + + If the state == 'All', a profile proto file will be written to + `profile_path`. This file records timeline information during the execution. + Then users can visualize this file to see the timeline, please refer + https://github.com/PaddlePaddle/Paddle/blob/develop/doc/fluid/howto/optimization/timeline.md Args: state (string) : The profiling state, which should be 'CPU' or 'GPU', @@ -156,6 +244,25 @@ def profiler(state, sorted_key=None, profile_path='/tmp/profile'): The `ave` means sorting by the average execution time. profile_path (string) : If state == 'All', it will write a profile proto output file. + + Raises: + ValueError: If `state` is not in ['CPU', 'GPU', 'All']. If `sorted_key` is + not in ['calls', 'total', 'max', 'min', 'ave']. + + Examples: + + .. code-block:: python + + import paddle.fluid.profiler as profiler + + with profiler.profiler('All', 'total', '/tmp/profile') as prof: + for pass_id in range(pass_num): + for batch_id, data in enumerate(train_reader()): + exe.run(fluid.default_main_program(), + feed=feeder.feed(data), + fetch_list=[], + use_program_cache=True) + # ... """ start_profiler(state) yield diff --git a/python/paddle/fluid/recordio_writer.py b/python/paddle/fluid/recordio_writer.py index 8d48e9abef..bd57772713 100644 --- a/python/paddle/fluid/recordio_writer.py +++ b/python/paddle/fluid/recordio_writer.py @@ -36,6 +36,45 @@ def convert_reader_to_recordio_file( compressor=core.RecordIOWriter.Compressor.Snappy, max_num_records=1000, feed_order=None): + """ + Convert a Python Reader to a recordio file. + + Please see :ref:`api_guide_python_reader` and :ref:`api_guide_reader_op` for + details. + + Examples: + + >>> import paddle.fluid as fluid + >>> import paddle.dataset.mnist as mnist + >>> import paddle + >>> + >>> tmp_program = fluid.Program() + >>> with fluid.program_guard(tmp_program): + >>> img = fluid.layers.data(name='img', shape=[784]) + >>> label = fluid.layers.data(name='label', shape=[1], dtype='int64') + >>> feeder = fluid.DataFeeder(feed_list=[img, label], place=fluid.CPUPlace()) + >>> # mnist.recordio will be generated in current directory + >>> fluid.recordio_writer.convert_reader_to_recordio_file( + >>> filename="mnist.recordio", + >>> reader_creator=paddle.batch(mnist.train(), batch_size=32), + >>> feeder=feeder) + + Args: + filename(str): The recordio filename. + reader_creator(callable): The Python Reader Creator. See + :ref:`api_guide_python_reader`. + feeder(DataFeeder): The DataFeeder instance. Used to convert + :code:`reader_creator` to :code: `lod_tensor` + compressor: Must in fluid.core.RecordIOWriter.Compressor.Snappy or + fluid.core.RecordIOWriter.Compressor.NoCompress. Use :code:`Snappy` + by default. + max_num_records(int): Maximum number of records in one chuck. Each record + is each return value from reader function + feed_order(list): The order of variable names that the reader returns + + Returns: + int: the number of record that saved. + """ if feed_order is None: feed_order = feeder.feed_names counter = 0 @@ -58,6 +97,17 @@ def convert_reader_to_recordio_files( compressor=core.RecordIOWriter.Compressor.Snappy, max_num_records=1000, feed_order=None): + """ + convert a python reader to many recordio files. + + This API is basically same as :code:`convert_reader_to_recordio_file`, + instead of it will create many recordio files. Each file contains at + most :code:`batch_per_file` records. + + Please reference + :ref:`api_fluid_recordio_writer_convert_reader_to_recordio_file` for more + details. + """ if feed_order is None: feed_order = feeder.feed_names f_name, f_ext = os.path.splitext(filename) diff --git a/python/paddle/fluid/regularizer.py b/python/paddle/fluid/regularizer.py index c4d6829599..dac474d5ee 100644 --- a/python/paddle/fluid/regularizer.py +++ b/python/paddle/fluid/regularizer.py @@ -16,8 +16,8 @@ import framework from . import core __all__ = [ - 'append_regularization_ops', 'WeightDecayRegularizer', 'L1Decay', 'L2Decay', - 'L1DecayRegularizer', 'L2DecayRegularizer' + 'append_regularization_ops', 'L1Decay', 'L2Decay', 'L1DecayRegularizer', + 'L2DecayRegularizer' ] @@ -36,7 +36,8 @@ def append_regularization_ops(parameters_and_grads, regularization=None): set. It will be applied with regularizer. Returns: - list of (parameters, gradients) pair with the regularized gradient + list[(Variable, Variable)]: list of (parameters, gradients) \ + pair with the regularized gradient Raises: Exception: Unknown regularization type @@ -100,6 +101,24 @@ class WeightDecayRegularizer(object): class L2DecayRegularizer(WeightDecayRegularizer): """Implements the L2 Weight Decay Regularization + + Small values of L2 can help prevent over fitting the training data. + + .. math:: + + L2WeightDecay = reg\_coeff * parameter + + Args: + regularization_coeff(float): regularization coeff + + Examples: + .. code-block:: python + + optimizer = fluid.optimizer.Adagrad( + learning_rate=1e-4, + regularization=fluid.regularizer.L2DecayRegularizer( + regularization_coeff=0.1)) + optimizer.minimize(avg_cost) """ def __init__(self, regularization_coeff=0.0): @@ -154,6 +173,27 @@ class L2DecayRegularizer(WeightDecayRegularizer): class L1DecayRegularizer(WeightDecayRegularizer): """Implements the L1 Weight Decay Regularization + + L1 regularization encourages sparsity. + + .. math:: + + L1WeightDecay = reg\_coeff * sign(parameter) + + Args: + regularization_coeff(float): regularization coeff + + Examples: + .. code-block:: python + + program = fluid.framework.Program() + block = program.global_block() + mul_x = block.create_parameter( + dtype="float32", + shape=[5, 10], + lod_level=0, + name="mul.x", + regularizer=fluid.regularizer.L1DecayRegularizer(0.5)) """ def __init__(self, regularization_coeff=0.0): diff --git a/python/paddle/fluid/tests/book/high-level-api/label_semantic_roles/test_label_semantic_roles_newapi.py b/python/paddle/fluid/tests/book/high-level-api/label_semantic_roles/test_label_semantic_roles_newapi.py index 0ccb3a39e0..67aa21e8c5 100755 --- a/python/paddle/fluid/tests/book/high-level-api/label_semantic_roles/test_label_semantic_roles_newapi.py +++ b/python/paddle/fluid/tests/book/high-level-api/label_semantic_roles/test_label_semantic_roles_newapi.py @@ -206,35 +206,35 @@ def infer(use_cuda, inference_program, params_dirname): inferencer = fluid.Inferencer( inference_program, param_path=params_dirname, place=place) - # Setup inputs by creating LoDTensors to represent sequences of words. - # Here each word is the basic element of these LoDTensors and the shape of + # Setup input by creating LoDTensor to represent sequence of words. + # Here each word is the basic element of the LoDTensor and the shape of # each word (base_shape) should be [1] since it is simply an index to # look up for the corresponding word vector. - # Suppose the length_based level of detail (lod) info is set to [[3, 4, 2]], - # which has only one lod level. Then the created LoDTensors will have only + # Suppose the recursive_sequence_lengths info is set to [[3, 4, 2]], + # which has only one level of detail. Then the created LoDTensor will have only # one higher level structure (sequence of words, or sentence) than the basic # element (word). Hence the LoDTensor will hold data for three sentences of # length 3, 4 and 2, respectively. - # Note that lod info should be a list of lists. - lod = [[3, 4, 2]] + # Note that recursive_sequence_lengths should be a list of lists. + recursive_seq_lens = [[3, 4, 2]] base_shape = [1] # The range of random integers is [low, high] word = fluid.create_random_int_lodtensor( - lod, base_shape, place, low=0, high=WORD_DICT_LEN - 1) + recursive_seq_lens, base_shape, place, low=0, high=WORD_DICT_LEN - 1) ctx_n2 = fluid.create_random_int_lodtensor( - lod, base_shape, place, low=0, high=WORD_DICT_LEN - 1) + recursive_seq_lens, base_shape, place, low=0, high=WORD_DICT_LEN - 1) ctx_n1 = fluid.create_random_int_lodtensor( - lod, base_shape, place, low=0, high=WORD_DICT_LEN - 1) + recursive_seq_lens, base_shape, place, low=0, high=WORD_DICT_LEN - 1) ctx_0 = fluid.create_random_int_lodtensor( - lod, base_shape, place, low=0, high=WORD_DICT_LEN - 1) + recursive_seq_lens, base_shape, place, low=0, high=WORD_DICT_LEN - 1) ctx_p1 = fluid.create_random_int_lodtensor( - lod, base_shape, place, low=0, high=WORD_DICT_LEN - 1) + recursive_seq_lens, base_shape, place, low=0, high=WORD_DICT_LEN - 1) ctx_p2 = fluid.create_random_int_lodtensor( - lod, base_shape, place, low=0, high=WORD_DICT_LEN - 1) + recursive_seq_lens, base_shape, place, low=0, high=WORD_DICT_LEN - 1) pred = fluid.create_random_int_lodtensor( - lod, base_shape, place, low=0, high=PRED_DICT_LEN - 1) + recursive_seq_lens, base_shape, place, low=0, high=PRED_DICT_LEN - 1) mark = fluid.create_random_int_lodtensor( - lod, base_shape, place, low=0, high=MARK_DICT_LEN - 1) + recursive_seq_lens, base_shape, place, low=0, high=MARK_DICT_LEN - 1) results = inferencer.infer( { diff --git a/python/paddle/fluid/tests/book/high-level-api/machine_translation/test_machine_translation.py b/python/paddle/fluid/tests/book/high-level-api/machine_translation/test_machine_translation.py index c4b37df3a0..8becd2404b 100644 --- a/python/paddle/fluid/tests/book/high-level-api/machine_translation/test_machine_translation.py +++ b/python/paddle/fluid/tests/book/high-level-api/machine_translation/test_machine_translation.py @@ -127,9 +127,19 @@ def decode(context, is_sparse): current_score = pd.fc(input=current_state_with_lod, size=target_dict_dim, act='softmax') - topk_scores, topk_indices = pd.topk(current_score, k=topk_size) + topk_scores, topk_indices = pd.topk(current_score, k=beam_size) + # calculate accumulated scores after topk to reduce computation cost + accu_scores = pd.elementwise_add( + x=pd.log(topk_scores), y=pd.reshape( + pre_score, shape=[-1]), axis=0) selected_ids, selected_scores = pd.beam_search( - pre_ids, topk_indices, topk_scores, beam_size, end_id=10, level=0) + pre_ids, + pre_score, + topk_indices, + accu_scores, + beam_size, + end_id=10, + level=0) pd.increment(x=counter, value=1, in_place=True) @@ -138,10 +148,14 @@ def decode(context, is_sparse): pd.array_write(selected_ids, array=ids_array, i=counter) pd.array_write(selected_scores, array=scores_array, i=counter) - pd.less_than(x=counter, y=array_len, cond=cond) + # update the break condition: up to the max length or all candidates of + # source sentences have ended. + length_cond = pd.less_than(x=counter, y=array_len) + finish_cond = pd.logical_not(pd.is_empty(x=selected_ids)) + pd.logical_and(x=length_cond, y=finish_cond, out=cond) translation_ids, translation_scores = pd.beam_search_decode( - ids=ids_array, scores=scores_array) + ids=ids_array, scores=scores_array, beam_size=beam_size, end_id=10) # return init_ids, init_scores @@ -215,11 +229,13 @@ def decode_main(use_cuda, is_sparse): [1. for _ in range(batch_size)], dtype='float32') init_ids_data = init_ids_data.reshape((batch_size, 1)) init_scores_data = init_scores_data.reshape((batch_size, 1)) - init_lod = [1] * batch_size - init_lod = [init_lod, init_lod] + init_recursive_seq_lens = [1] * batch_size + init_recursive_seq_lens = [init_recursive_seq_lens, init_recursive_seq_lens] - init_ids = fluid.create_lod_tensor(init_ids_data, init_lod, place) - init_scores = fluid.create_lod_tensor(init_scores_data, init_lod, place) + init_ids = fluid.create_lod_tensor(init_ids_data, init_recursive_seq_lens, + place) + init_scores = fluid.create_lod_tensor(init_scores_data, + init_recursive_seq_lens, place) train_data = paddle.batch( paddle.reader.shuffle( @@ -243,7 +259,7 @@ def decode_main(use_cuda, is_sparse): feed=feed_dict, fetch_list=[translation_ids, translation_scores], return_numpy=False) - print result_ids.lod() + print result_ids.recursive_sequence_lengths() break diff --git a/python/paddle/fluid/tests/book/high-level-api/recommender_system/test_recommender_system_newapi.py b/python/paddle/fluid/tests/book/high-level-api/recommender_system/test_recommender_system_newapi.py index 090c11ce1e..c860f16417 100644 --- a/python/paddle/fluid/tests/book/high-level-api/recommender_system/test_recommender_system_newapi.py +++ b/python/paddle/fluid/tests/book/high-level-api/recommender_system/test_recommender_system_newapi.py @@ -209,13 +209,15 @@ def infer(use_cuda, inference_program, params_dirname): inference_program, param_path=params_dirname, place=place) # Use the first data from paddle.dataset.movielens.test() as input. - # Use create_lod_tensor(data, lod, place) API to generate LoD Tensor, - # where `data` is a list of sequences of index numbers, `lod` is - # the level of detail (lod) info associated with `data`. + # Use create_lod_tensor(data, recursive_sequence_lengths, place) API + # to generate LoD Tensor where `data` is a list of sequences of index + # numbers, `recursive_sequence_lengths` is the length-based level of detail + # (lod) info associated with `data`. # For example, data = [[10, 2, 3], [2, 3]] means that it contains # two sequences of indexes, of length 3 and 2, respectively. - # Correspondingly, lod = [[3, 2]] contains one level of detail info, - # indicating that `data` consists of two sequences of length 3 and 2. + # Correspondingly, recursive_sequence_lengths = [[3, 2]] contains one + # level of detail info, indicating that `data` consists of two sequences + # of length 3 and 2, respectively. user_id = fluid.create_lod_tensor([[1]], [[1]], place) gender_id = fluid.create_lod_tensor([[1]], [[1]], place) age_id = fluid.create_lod_tensor([[0]], [[1]], place) diff --git a/python/paddle/fluid/tests/book/high-level-api/understand_sentiment/test_understand_sentiment_conv.py b/python/paddle/fluid/tests/book/high-level-api/understand_sentiment/test_understand_sentiment_conv.py index 9b61f7a00c..1668ae83d3 100644 --- a/python/paddle/fluid/tests/book/high-level-api/understand_sentiment/test_understand_sentiment_conv.py +++ b/python/paddle/fluid/tests/book/high-level-api/understand_sentiment/test_understand_sentiment_conv.py @@ -128,17 +128,17 @@ def infer(use_cuda, inference_program, params_dirname=None): # Here each word is the basic element of the LoDTensor and the shape of # each word (base_shape) should be [1] since it is simply an index to # look up for the corresponding word vector. - # Suppose the length_based level of detail (lod) info is set to [[3, 4, 2]], - # which has only one lod level. Then the created LoDTensor will have only + # Suppose the recursive_sequence_lengths info is set to [[3, 4, 2]], + # which has only one level of detail. Then the created LoDTensor will have only # one higher level structure (sequence of words, or sentence) than the basic # element (word). Hence the LoDTensor will hold data for three sentences of # length 3, 4 and 2, respectively. - # Note that lod info should be a list of lists. - lod = [[3, 4, 2]] + # Note that recursive_sequence_lengths should be a list of lists. + recursive_seq_lens = [[3, 4, 2]] base_shape = [1] # The range of random integers is [low, high] tensor_words = fluid.create_random_int_lodtensor( - lod, base_shape, place, low=0, high=len(word_dict) - 1) + recursive_seq_lens, base_shape, place, low=0, high=len(word_dict) - 1) results = inferencer.infer({'words': tensor_words}) print("infer results: ", results) diff --git a/python/paddle/fluid/tests/book/high-level-api/understand_sentiment/test_understand_sentiment_dynamic_rnn.py b/python/paddle/fluid/tests/book/high-level-api/understand_sentiment/test_understand_sentiment_dynamic_rnn.py index aa7c567b4d..8da89d82cb 100644 --- a/python/paddle/fluid/tests/book/high-level-api/understand_sentiment/test_understand_sentiment_dynamic_rnn.py +++ b/python/paddle/fluid/tests/book/high-level-api/understand_sentiment/test_understand_sentiment_dynamic_rnn.py @@ -143,17 +143,17 @@ def infer(use_cuda, inference_program, params_dirname=None): # Here each word is the basic element of the LoDTensor and the shape of # each word (base_shape) should be [1] since it is simply an index to # look up for the corresponding word vector. - # Suppose the length_based level of detail (lod) info is set to [[3, 4, 2]], - # which has only one lod level. Then the created LoDTensor will have only + # Suppose the recursive_sequence_lengths info is set to [[3, 4, 2]], + # which has only one level of detail. Then the created LoDTensor will have only # one higher level structure (sequence of words, or sentence) than the basic # element (word). Hence the LoDTensor will hold data for three sentences of # length 3, 4 and 2, respectively. - # Note that lod info should be a list of lists. - lod = [[3, 4, 2]] + # Note that recursive_sequence_lengths should be a list of lists. + recursive_seq_lens = [[3, 4, 2]] base_shape = [1] # The range of random integers is [low, high] tensor_words = fluid.create_random_int_lodtensor( - lod, base_shape, place, low=0, high=len(word_dict) - 1) + recursive_seq_lens, base_shape, place, low=0, high=len(word_dict) - 1) results = inferencer.infer({'words': tensor_words}) print("infer results: ", results) diff --git a/python/paddle/fluid/tests/book/high-level-api/understand_sentiment/test_understand_sentiment_stacked_lstm.py b/python/paddle/fluid/tests/book/high-level-api/understand_sentiment/test_understand_sentiment_stacked_lstm.py index 8c74be0f08..74faa2e8aa 100644 --- a/python/paddle/fluid/tests/book/high-level-api/understand_sentiment/test_understand_sentiment_stacked_lstm.py +++ b/python/paddle/fluid/tests/book/high-level-api/understand_sentiment/test_understand_sentiment_stacked_lstm.py @@ -138,17 +138,17 @@ def infer(use_cuda, inference_program, params_dirname=None): # Here each word is the basic element of the LoDTensor and the shape of # each word (base_shape) should be [1] since it is simply an index to # look up for the corresponding word vector. - # Suppose the length_based level of detail (lod) info is set to [[3, 4, 2]], - # which has only one lod level. Then the created LoDTensor will have only + # Suppose the recursive_sequence_lengths info is set to [[3, 4, 2]], + # which has only one level of detail. Then the created LoDTensor will have only # one higher level structure (sequence of words, or sentence) than the basic # element (word). Hence the LoDTensor will hold data for three sentences of # length 3, 4 and 2, respectively. - # Note that lod info should be a list of lists. - lod = [[3, 4, 2]] + # Note that recursive_sequence_lengths should be a list of lists. + recursive_seq_lens = [[3, 4, 2]] base_shape = [1] # The range of random integers is [low, high] tensor_words = fluid.create_random_int_lodtensor( - lod, base_shape, place, low=0, high=len(word_dict) - 1) + recursive_seq_lens, base_shape, place, low=0, high=len(word_dict) - 1) results = inferencer.infer({'words': tensor_words}) print("infer results: ", results) diff --git a/python/paddle/fluid/tests/book/high-level-api/word2vec/test_word2vec_new_api.py b/python/paddle/fluid/tests/book/high-level-api/word2vec/test_word2vec_new_api.py index ba44f72d9b..02e65cf56c 100644 --- a/python/paddle/fluid/tests/book/high-level-api/word2vec/test_word2vec_new_api.py +++ b/python/paddle/fluid/tests/book/high-level-api/word2vec/test_word2vec_new_api.py @@ -124,21 +124,22 @@ def infer(use_cuda, inference_program, params_dirname=None): # Setup inputs by creating 4 LoDTensors representing 4 words. Here each word # is simply an index to look up for the corresponding word vector and hence - # the shape of word (base_shape) should be [1]. The length-based level of - # detail (lod) info of each LoDtensor should be [[1]] meaning there is only - # one lod_level and there is only one sequence of one word on this level. - # Note that lod info should be a list of lists. - lod = [[1]] + # the shape of word (base_shape) should be [1]. The recursive_sequence_lengths, + # which is length-based level of detail (lod) of each LoDTensor, should be [[1]] + # meaning there is only one level of detail and there is only one sequence of + # one word on this level. + # Note that recursive_sequence_lengths should be a list of lists. + recursive_seq_lens = [[1]] base_shape = [1] # The range of random integers is [low, high] first_word = fluid.create_random_int_lodtensor( - lod, base_shape, place, low=0, high=dict_size - 1) + recursive_seq_lens, base_shape, place, low=0, high=dict_size - 1) second_word = fluid.create_random_int_lodtensor( - lod, base_shape, place, low=0, high=dict_size - 1) + recursive_seq_lens, base_shape, place, low=0, high=dict_size - 1) third_word = fluid.create_random_int_lodtensor( - lod, base_shape, place, low=0, high=dict_size - 1) + recursive_seq_lens, base_shape, place, low=0, high=dict_size - 1) fourth_word = fluid.create_random_int_lodtensor( - lod, base_shape, place, low=0, high=dict_size - 1) + recursive_seq_lens, base_shape, place, low=0, high=dict_size - 1) result = inferencer.infer( { diff --git a/python/paddle/fluid/tests/book/notest_understand_sentiment.py b/python/paddle/fluid/tests/book/notest_understand_sentiment.py index c6687e8ad7..1df7b99aad 100644 --- a/python/paddle/fluid/tests/book/notest_understand_sentiment.py +++ b/python/paddle/fluid/tests/book/notest_understand_sentiment.py @@ -194,16 +194,16 @@ def train(word_dict, if is_local: train_loop(fluid.default_main_program()) else: - port = os.getenv("PADDLE_INIT_PORT", "6174") - pserver_ips = os.getenv("PADDLE_INIT_PSERVERS") # ip,ip... + port = os.getenv("PADDLE_PSERVER_PORT", "6174") + pserver_ips = os.getenv("PADDLE_PSERVER_IPS") # ip,ip... eplist = [] for ip in pserver_ips.split(","): eplist.append(':'.join([ip, port])) pserver_endpoints = ",".join(eplist) # ip:port,ip:port... - trainers = int(os.getenv("TRAINERS")) + trainers = int(os.getenv("PADDLE_TRAINERS")) current_endpoint = os.getenv("POD_IP") + ":" + port - trainer_id = int(os.getenv("PADDLE_INIT_TRAINER_ID")) - training_role = os.getenv("TRAINING_ROLE", "TRAINER") + trainer_id = int(os.getenv("PADDLE_TRAINER_ID")) + training_role = os.getenv("PADDLE_TRAINING_ROLE", "TRAINER") t = fluid.DistributeTranspiler() t.transpile(trainer_id, pservers=pserver_endpoints, trainers=trainers) if training_role == "PSERVER": @@ -238,17 +238,21 @@ def infer(word_dict, use_cuda, save_dirname=None): # Here each word is the basic element of the LoDTensor and the shape of # each word (base_shape) should be [1] since it is simply an index to # look up for the corresponding word vector. - # Suppose the length_based level of detail (lod) info is set to [[3, 4, 2]], - # which has only one lod level. Then the created LoDTensor will have only + # Suppose the recursive_sequence_lengths info is set to [[3, 4, 2]], + # which has only one level of detail. Then the created LoDTensor will have only # one higher level structure (sequence of words, or sentence) than the basic # element (word). Hence the LoDTensor will hold data for three sentences of # length 3, 4 and 2, respectively. - # Note that lod info should be a list of lists. - lod = [[3, 4, 2]] + # Note that recursive_sequence_lengths should be a list of lists. + recursive_seq_lens = [[3, 4, 2]] base_shape = [1] # The range of random integers is [low, high] tensor_words = fluid.create_random_int_lodtensor( - lod, base_shape, place, low=0, high=word_dict_len - 1) + recursive_seq_lens, + base_shape, + place, + low=0, + high=word_dict_len - 1) # Construct feed as a dictionary of {feed_target_name: feed_target_data} # and results will contain a list of data corresponding to fetch_targets. @@ -257,7 +261,7 @@ def infer(word_dict, use_cuda, save_dirname=None): feed={feed_target_names[0]: tensor_words}, fetch_list=fetch_targets, return_numpy=False) - print(results[0].lod()) + print(results[0].recursive_sequence_lengths()) np_data = np.array(results[0]) print("Inference Shape: ", np_data.shape) print("Inference results: ", np_data) diff --git a/python/paddle/fluid/tests/book/test_fit_a_line.py b/python/paddle/fluid/tests/book/test_fit_a_line.py index b1a6b524d3..71bf5f8b3a 100644 --- a/python/paddle/fluid/tests/book/test_fit_a_line.py +++ b/python/paddle/fluid/tests/book/test_fit_a_line.py @@ -69,16 +69,16 @@ def train(use_cuda, save_dirname, is_local): if is_local: train_loop(fluid.default_main_program()) else: - port = os.getenv("PADDLE_INIT_PORT", "6174") - pserver_ips = os.getenv("PADDLE_INIT_PSERVERS") # ip,ip... + port = os.getenv("PADDLE_PSERVER_PORT", "6174") + pserver_ips = os.getenv("PADDLE_PSERVER_IPS") # ip,ip... eplist = [] for ip in pserver_ips.split(","): eplist.append(':'.join([ip, port])) pserver_endpoints = ",".join(eplist) # ip:port,ip:port... - trainers = int(os.getenv("TRAINERS")) + trainers = int(os.getenv("PADDLE_TRAINERS")) current_endpoint = os.getenv("POD_IP") + ":" + port - trainer_id = int(os.getenv("PADDLE_INIT_TRAINER_ID")) - training_role = os.getenv("TRAINING_ROLE", "TRAINER") + trainer_id = int(os.getenv("PADDLE_TRAINER_ID")) + training_role = os.getenv("PADDLE_TRAINING_ROLE", "TRAINER") t = fluid.DistributeTranspiler() t.transpile(trainer_id, pservers=pserver_endpoints, trainers=trainers) if training_role == "PSERVER": @@ -110,14 +110,23 @@ def infer(use_cuda, save_dirname=None): # The input's dimension should be 2-D and the second dim is 13 # The input data should be >= 0 batch_size = 10 - tensor_x = numpy.random.uniform(0, 10, - [batch_size, 13]).astype("float32") + + test_reader = paddle.batch( + paddle.dataset.uci_housing.test(), batch_size=batch_size) + + test_data = test_reader().next() + test_feat = numpy.array( + [data[0] for data in test_data]).astype("float32") + test_label = numpy.array( + [data[1] for data in test_data]).astype("float32") + assert feed_target_names[0] == 'x' results = exe.run(inference_program, - feed={feed_target_names[0]: tensor_x}, + feed={feed_target_names[0]: numpy.array(test_feat)}, fetch_list=fetch_targets) print("infer shape: ", results[0].shape) print("infer results: ", results[0]) + print("ground truth: ", test_label) def main(use_cuda, is_local=True): diff --git a/python/paddle/fluid/tests/book/test_image_classification.py b/python/paddle/fluid/tests/book/test_image_classification.py index 0f3a4c9242..a2fb186b86 100644 --- a/python/paddle/fluid/tests/book/test_image_classification.py +++ b/python/paddle/fluid/tests/book/test_image_classification.py @@ -178,16 +178,16 @@ def train(net_type, use_cuda, save_dirname, is_local): if is_local: train_loop(fluid.default_main_program()) else: - port = os.getenv("PADDLE_INIT_PORT", "6174") - pserver_ips = os.getenv("PADDLE_INIT_PSERVERS") # ip,ip... + port = os.getenv("PADDLE_PSERVER_PORT", "6174") + pserver_ips = os.getenv("PADDLE_PSERVER_IPS") # ip,ip... eplist = [] for ip in pserver_ips.split(","): eplist.append(':'.join([ip, port])) pserver_endpoints = ",".join(eplist) # ip:port,ip:port... - trainers = int(os.getenv("TRAINERS")) + trainers = int(os.getenv("PADDLE_TRAINERS")) current_endpoint = os.getenv("POD_IP") + ":" + port - trainer_id = int(os.getenv("PADDLE_INIT_TRAINER_ID")) - training_role = os.getenv("TRAINING_ROLE", "TRAINER") + trainer_id = int(os.getenv("PADDLE_TRAINER_ID")) + training_role = os.getenv("PADDLE_TRAINING_ROLE", "TRAINER") t = fluid.DistributeTranspiler() t.transpile(trainer_id, pservers=pserver_endpoints, trainers=trainers) if training_role == "PSERVER": diff --git a/python/paddle/fluid/tests/book/test_label_semantic_roles.py b/python/paddle/fluid/tests/book/test_label_semantic_roles.py index 99d51ae007..d489feae9c 100644 --- a/python/paddle/fluid/tests/book/test_label_semantic_roles.py +++ b/python/paddle/fluid/tests/book/test_label_semantic_roles.py @@ -209,16 +209,16 @@ def train(use_cuda, save_dirname=None, is_local=True): if is_local: train_loop(fluid.default_main_program()) else: - port = os.getenv("PADDLE_INIT_PORT", "6174") - pserver_ips = os.getenv("PADDLE_INIT_PSERVERS") # ip,ip... + port = os.getenv("PADDLE_PSERVER_PORT", "6174") + pserver_ips = os.getenv("PADDLE_PSERVER_IPS") # ip,ip... eplist = [] for ip in pserver_ips.split(","): eplist.append(':'.join([ip, port])) pserver_endpoints = ",".join(eplist) # ip:port,ip:port... - trainers = int(os.getenv("TRAINERS")) + trainers = int(os.getenv("PADDLE_TRAINERS")) current_endpoint = os.getenv("POD_IP") + ":" + port - trainer_id = int(os.getenv("PADDLE_INIT_TRAINER_ID")) - training_role = os.getenv("TRAINING_ROLE", "TRAINER") + trainer_id = int(os.getenv("PADDLE_TRAINER_ID")) + training_role = os.getenv("PADDLE_TRAINING_ROLE", "TRAINER") t = fluid.DistributeTranspiler() t.transpile(trainer_id, pservers=pserver_endpoints, trainers=trainers) if training_role == "PSERVER": @@ -247,35 +247,67 @@ def infer(use_cuda, save_dirname=None): [inference_program, feed_target_names, fetch_targets] = fluid.io.load_inference_model(save_dirname, exe) - # Setup inputs by creating LoDTensors to represent sequences of words. - # Here each word is the basic element of these LoDTensors and the shape of + # Setup input by creating LoDTensor to represent sequence of words. + # Here each word is the basic element of the LoDTensor and the shape of # each word (base_shape) should be [1] since it is simply an index to # look up for the corresponding word vector. - # Suppose the length_based level of detail (lod) info is set to [[3, 4, 2]], - # which has only one lod level. Then the created LoDTensors will have only + # Suppose the recursive_sequence_lengths info is set to [[3, 4, 2]], + # which has only one level of detail. Then the created LoDTensor will have only # one higher level structure (sequence of words, or sentence) than the basic # element (word). Hence the LoDTensor will hold data for three sentences of # length 3, 4 and 2, respectively. - # Note that lod info should be a list of lists. - lod = [[3, 4, 2]] + # Note that recursive_sequence_lengths should be a list of lists. + recursive_seq_lens = [[3, 4, 2]] base_shape = [1] # The range of random integers is [low, high] word = fluid.create_random_int_lodtensor( - lod, base_shape, place, low=0, high=word_dict_len - 1) + recursive_seq_lens, + base_shape, + place, + low=0, + high=word_dict_len - 1) pred = fluid.create_random_int_lodtensor( - lod, base_shape, place, low=0, high=pred_dict_len - 1) + recursive_seq_lens, + base_shape, + place, + low=0, + high=pred_dict_len - 1) ctx_n2 = fluid.create_random_int_lodtensor( - lod, base_shape, place, low=0, high=word_dict_len - 1) + recursive_seq_lens, + base_shape, + place, + low=0, + high=word_dict_len - 1) ctx_n1 = fluid.create_random_int_lodtensor( - lod, base_shape, place, low=0, high=word_dict_len - 1) + recursive_seq_lens, + base_shape, + place, + low=0, + high=word_dict_len - 1) ctx_0 = fluid.create_random_int_lodtensor( - lod, base_shape, place, low=0, high=word_dict_len - 1) + recursive_seq_lens, + base_shape, + place, + low=0, + high=word_dict_len - 1) ctx_p1 = fluid.create_random_int_lodtensor( - lod, base_shape, place, low=0, high=word_dict_len - 1) + recursive_seq_lens, + base_shape, + place, + low=0, + high=word_dict_len - 1) ctx_p2 = fluid.create_random_int_lodtensor( - lod, base_shape, place, low=0, high=word_dict_len - 1) + recursive_seq_lens, + base_shape, + place, + low=0, + high=word_dict_len - 1) mark = fluid.create_random_int_lodtensor( - lod, base_shape, place, low=0, high=mark_dict_len - 1) + recursive_seq_lens, + base_shape, + place, + low=0, + high=mark_dict_len - 1) # Construct feed as a dictionary of {feed_target_name: feed_target_data} # and results will contain a list of data corresponding to fetch_targets. @@ -301,7 +333,7 @@ def infer(use_cuda, save_dirname=None): }, fetch_list=fetch_targets, return_numpy=False) - print(results[0].lod()) + print(results[0].recursive_sequence_lengths()) np_data = np.array(results[0]) print("Inference Shape: ", np_data.shape) diff --git a/python/paddle/fluid/tests/book/test_machine_translation.py b/python/paddle/fluid/tests/book/test_machine_translation.py index 23e5900f12..90c301a661 100644 --- a/python/paddle/fluid/tests/book/test_machine_translation.py +++ b/python/paddle/fluid/tests/book/test_machine_translation.py @@ -108,7 +108,7 @@ def decoder_decode(context, is_sparse): pre_state = pd.array_read(array=state_array, i=counter) pre_score = pd.array_read(array=scores_array, i=counter) - # expand the lod of pre_state to be the same with pre_score + # expand the recursive_sequence_lengths of pre_state to be the same with pre_score pre_state_expanded = pd.sequence_expand(pre_state, pre_score) pre_ids_emb = pd.embedding( @@ -126,9 +126,19 @@ def decoder_decode(context, is_sparse): current_score = pd.fc(input=current_state_with_lod, size=target_dict_dim, act='softmax') - topk_scores, topk_indices = pd.topk(current_score, k=50) + topk_scores, topk_indices = pd.topk(current_score, k=beam_size) + # calculate accumulated scores after topk to reduce computation cost + accu_scores = pd.elementwise_add( + x=pd.log(topk_scores), y=pd.reshape( + pre_score, shape=[-1]), axis=0) selected_ids, selected_scores = pd.beam_search( - pre_ids, topk_indices, topk_scores, beam_size, end_id=10, level=0) + pre_ids, + pre_score, + topk_indices, + accu_scores, + beam_size, + end_id=10, + level=0) pd.increment(x=counter, value=1, in_place=True) @@ -137,10 +147,14 @@ def decoder_decode(context, is_sparse): pd.array_write(selected_ids, array=ids_array, i=counter) pd.array_write(selected_scores, array=scores_array, i=counter) - pd.less_than(x=counter, y=array_len, cond=cond) + # update the break condition: up to the max length or all candidates of + # source sentences have ended. + length_cond = pd.less_than(x=counter, y=array_len) + finish_cond = pd.logical_not(pd.is_empty(x=selected_ids)) + pd.logical_and(x=length_cond, y=finish_cond, out=cond) translation_ids, translation_scores = pd.beam_search_decode( - ids=ids_array, scores=scores_array) + ids=ids_array, scores=scores_array, beam_size=beam_size, end_id=10) # return init_ids, init_scores @@ -200,16 +214,16 @@ def train_main(use_cuda, is_sparse, is_local=True): if is_local: train_loop(framework.default_main_program()) else: - port = os.getenv("PADDLE_INIT_PORT", "6174") - pserver_ips = os.getenv("PADDLE_INIT_PSERVERS") # ip,ip... + port = os.getenv("PADDLE_PSERVER_PORT", "6174") + pserver_ips = os.getenv("PADDLE_PSERVER_IPS") # ip,ip... eplist = [] for ip in pserver_ips.split(","): eplist.append(':'.join([ip, port])) pserver_endpoints = ",".join(eplist) # ip:port,ip:port... - trainers = int(os.getenv("TRAINERS")) + trainers = int(os.getenv("PADDLE_TRAINERS")) current_endpoint = os.getenv("POD_IP") + ":" + port - trainer_id = int(os.getenv("PADDLE_INIT_TRAINER_ID")) - training_role = os.getenv("TRAINING_ROLE", "TRAINER") + trainer_id = int(os.getenv("PADDLE_TRAINER_ID")) + training_role = os.getenv("PADDLE_TRAINING_ROLE", "TRAINER") t = fluid.DistributeTranspiler() t.transpile(trainer_id, pservers=pserver_endpoints, trainers=trainers) if training_role == "PSERVER": @@ -238,11 +252,13 @@ def decode_main(use_cuda, is_sparse): [1. for _ in range(batch_size)], dtype='float32') init_ids_data = init_ids_data.reshape((batch_size, 1)) init_scores_data = init_scores_data.reshape((batch_size, 1)) - init_lod = [1] * batch_size - init_lod = [init_lod, init_lod] + init_recursive_seq_lens = [1] * batch_size + init_recursive_seq_lens = [init_recursive_seq_lens, init_recursive_seq_lens] - init_ids = fluid.create_lod_tensor(init_ids_data, init_lod, place) - init_scores = fluid.create_lod_tensor(init_scores_data, init_lod, place) + init_ids = fluid.create_lod_tensor(init_ids_data, init_recursive_seq_lens, + place) + init_scores = fluid.create_lod_tensor(init_scores_data, + init_recursive_seq_lens, place) train_data = paddle.batch( paddle.reader.shuffle( @@ -266,7 +282,7 @@ def decode_main(use_cuda, is_sparse): feed=feed_dict, fetch_list=[translation_ids, translation_scores], return_numpy=False) - print result_ids.lod() + print result_ids.recursive_sequence_lengths() break diff --git a/python/paddle/fluid/tests/book/test_recognize_digits.py b/python/paddle/fluid/tests/book/test_recognize_digits.py index 578b1162fb..5f5c8544bb 100644 --- a/python/paddle/fluid/tests/book/test_recognize_digits.py +++ b/python/paddle/fluid/tests/book/test_recognize_digits.py @@ -94,7 +94,7 @@ def train(nn_type, test_program = fluid.default_main_program().clone(for_test=True) - optimizer = fluid.optimizer.Adam(learning_rate=0.001) + optimizer = fluid.optimizer.Adam(learning_rate=0.001, LARS_weight_decay=0.3) optimizer.minimize(avg_loss) place = fluid.CUDAPlace(0) if use_cuda else fluid.CPUPlace() @@ -151,16 +151,16 @@ def train(nn_type, if is_local: train_loop(fluid.default_main_program()) else: - port = os.getenv("PADDLE_INIT_PORT", "6174") - pserver_ips = os.getenv("PADDLE_INIT_PSERVERS") # ip,ip... + port = os.getenv("PADDLE_PSERVER_PORT", "6174") + pserver_ips = os.getenv("PADDLE_PSERVER_IPS") # ip,ip... eplist = [] for ip in pserver_ips.split(","): eplist.append(':'.join([ip, port])) pserver_endpoints = ",".join(eplist) # ip:port,ip:port... - trainers = int(os.getenv("TRAINERS")) + trainers = int(os.getenv("PADDLE_TRAINERS")) current_endpoint = os.getenv("POD_IP") + ":" + port - trainer_id = int(os.getenv("PADDLE_INIT_TRAINER_ID")) - training_role = os.getenv("TRAINING_ROLE", "TRAINER") + trainer_id = int(os.getenv("PADDLE_TRAINER_ID")) + training_role = os.getenv("PADDLE_TRAINING_ROLE", "TRAINER") t = fluid.DistributeTranspiler() t.transpile(trainer_id, pservers=pserver_endpoints, trainers=trainers) if training_role == "PSERVER": diff --git a/python/paddle/fluid/tests/book/test_recommender_system.py b/python/paddle/fluid/tests/book/test_recommender_system.py index 65d6552acc..6548766ef5 100644 --- a/python/paddle/fluid/tests/book/test_recommender_system.py +++ b/python/paddle/fluid/tests/book/test_recommender_system.py @@ -220,16 +220,16 @@ def train(use_cuda, save_dirname, is_local=True): if is_local: train_loop(fluid.default_main_program()) else: - port = os.getenv("PADDLE_INIT_PORT", "6174") - pserver_ips = os.getenv("PADDLE_INIT_PSERVERS") # ip,ip... + port = os.getenv("PADDLE_PSERVER_PORT", "6174") + pserver_ips = os.getenv("PADDLE_PSERVER_IPS") # ip,ip... eplist = [] for ip in pserver_ips.split(","): eplist.append(':'.join([ip, port])) pserver_endpoints = ",".join(eplist) # ip:port,ip:port... - trainers = int(os.getenv("TRAINERS")) + trainers = int(os.getenv("PADDLE_TRAINERS")) current_endpoint = os.getenv("POD_IP") + ":" + port - trainer_id = int(os.getenv("PADDLE_INIT_TRAINER_ID")) - training_role = os.getenv("TRAINING_ROLE", "TRAINER") + trainer_id = int(os.getenv("PADDLE_TRAINER_ID")) + training_role = os.getenv("PADDLE_TRAINING_ROLE", "TRAINER") t = fluid.DistributeTranspiler() t.transpile(trainer_id, pservers=pserver_endpoints, trainers=trainers) if training_role == "PSERVER": @@ -260,13 +260,15 @@ def infer(use_cuda, save_dirname=None): # Use the first data from paddle.dataset.movielens.test() as input assert feed_target_names[0] == "user_id" - # Use create_lod_tensor(data, lod, place) API to generate LoD Tensor - # where `data` is a list of sequences of index numbers, `lod` is - # the level of detail (lod) info associated with `data`. + # Use create_lod_tensor(data, recursive_sequence_lengths, place) API + # to generate LoD Tensor where `data` is a list of sequences of index + # numbers, `recursive_sequence_lengths` is the length-based level of detail + # (lod) info associated with `data`. # For example, data = [[10, 2, 3], [2, 3]] means that it contains # two sequences of indexes, of length 3 and 2, respectively. - # Correspondingly, lod = [[3, 2]] contains one level of detail info, - # indicating that `data` consists of two sequences of length 3 and 2. + # Correspondingly, recursive_sequence_lengths = [[3, 2]] contains one + # level of detail info, indicating that `data` consists of two sequences + # of length 3 and 2, respectively. user_id = fluid.create_lod_tensor([[1]], [[1]], place) assert feed_target_names[1] == "gender_id" diff --git a/python/paddle/fluid/tests/book/test_rnn_encoder_decoder.py b/python/paddle/fluid/tests/book/test_rnn_encoder_decoder.py index 7ada57def6..4672826241 100644 --- a/python/paddle/fluid/tests/book/test_rnn_encoder_decoder.py +++ b/python/paddle/fluid/tests/book/test_rnn_encoder_decoder.py @@ -216,19 +216,19 @@ def infer(use_cuda, save_dirname=None): # Here each word is the basic element of the LoDTensor and the shape of # each word (base_shape) should be [1] since it is simply an index to # look up for the corresponding word vector. - # Suppose the length_based level of detail (lod) info is set to [[4, 6]], - # which has only one lod level. Then the created LoDTensor will have only + # Suppose the recursive_sequence_lengths info is set to [[4, 6]], + # which has only one level of detail. Then the created LoDTensor will have only # one higher level structure (sequence of words, or sentence) than the basic # element (word). Hence the LoDTensor will hold data for two sentences of # length 4 and 6, respectively. - # Note that lod info should be a list of lists. - lod = [[4, 6]] + # Note that recursive_sequence_lengths should be a list of lists. + recursive_seq_lens = [[4, 6]] base_shape = [1] # The range of random integers is [low, high] word_data = fluid.create_random_int_lodtensor( - lod, base_shape, place, low=0, high=1) + recursive_seq_lens, base_shape, place, low=0, high=1) trg_word = fluid.create_random_int_lodtensor( - lod, base_shape, place, low=0, high=1) + recursive_seq_lens, base_shape, place, low=0, high=1) # Construct feed as a dictionary of {feed_target_name: feed_target_data} # and results will contain a list of data corresponding to fetch_targets. @@ -241,7 +241,7 @@ def infer(use_cuda, save_dirname=None): }, fetch_list=fetch_targets, return_numpy=False) - print(results[0].lod()) + print(results[0].recursive_sequence_lengths()) np_data = np.array(results[0]) print("Inference shape: ", np_data.shape) print("Inference results: ", np_data) diff --git a/python/paddle/fluid/tests/book/test_word2vec.py b/python/paddle/fluid/tests/book/test_word2vec.py index 3118d88701..49bd72c7a5 100644 --- a/python/paddle/fluid/tests/book/test_word2vec.py +++ b/python/paddle/fluid/tests/book/test_word2vec.py @@ -125,16 +125,16 @@ def train(use_cuda, is_sparse, is_parallel, save_dirname, is_local=True): if is_local: train_loop(fluid.default_main_program()) else: - port = os.getenv("PADDLE_INIT_PORT", "6174") - pserver_ips = os.getenv("PADDLE_INIT_PSERVERS") # ip,ip... + port = os.getenv("PADDLE_PSERVER_PORT", "6174") + pserver_ips = os.getenv("PADDLE_PSERVER_IPS") # ip,ip... eplist = [] for ip in pserver_ips.split(","): eplist.append(':'.join([ip, port])) pserver_endpoints = ",".join(eplist) # ip:port,ip:port... - trainers = int(os.getenv("TRAINERS")) + trainers = int(os.getenv("PADDLE_TRAINERS")) current_endpoint = os.getenv("POD_IP") + ":" + port - trainer_id = int(os.getenv("PADDLE_INIT_TRAINER_ID")) - training_role = os.getenv("TRAINING_ROLE", "TRAINER") + trainer_id = int(os.getenv("PADDLE_TRAINER_ID")) + training_role = os.getenv("PADDLE_TRAINING_ROLE", "TRAINER") t = fluid.DistributeTranspiler() t.transpile(trainer_id, pservers=pserver_endpoints, trainers=trainers) if training_role == "PSERVER": @@ -168,21 +168,22 @@ def infer(use_cuda, save_dirname=None): # Setup inputs by creating 4 LoDTensors representing 4 words. Here each word # is simply an index to look up for the corresponding word vector and hence - # the shape of word (base_shape) should be [1]. The length-based level of - # detail (lod) info of each LoDtensor should be [[1]] meaning there is only - # one lod_level and there is only one sequence of one word on this level. - # Note that lod info should be a list of lists. - lod = [[1]] + # the shape of word (base_shape) should be [1]. The recursive_sequence_lengths, + # which is length-based level of detail (lod) of each LoDTensor, should be [[1]] + # meaning there is only one level of detail and there is only one sequence of + # one word on this level. + # Note that recursive_sequence_lengths should be a list of lists. + recursive_seq_lens = [[1]] base_shape = [1] # The range of random integers is [low, high] first_word = fluid.create_random_int_lodtensor( - lod, base_shape, place, low=0, high=dict_size - 1) + recursive_seq_lens, base_shape, place, low=0, high=dict_size - 1) second_word = fluid.create_random_int_lodtensor( - lod, base_shape, place, low=0, high=dict_size - 1) + recursive_seq_lens, base_shape, place, low=0, high=dict_size - 1) third_word = fluid.create_random_int_lodtensor( - lod, base_shape, place, low=0, high=dict_size - 1) + recursive_seq_lens, base_shape, place, low=0, high=dict_size - 1) fourth_word = fluid.create_random_int_lodtensor( - lod, base_shape, place, low=0, high=dict_size - 1) + recursive_seq_lens, base_shape, place, low=0, high=dict_size - 1) assert feed_target_names[0] == 'firstw' assert feed_target_names[1] == 'secondw' @@ -200,7 +201,7 @@ def infer(use_cuda, save_dirname=None): }, fetch_list=fetch_targets, return_numpy=False) - print(results[0].lod()) + print(results[0].recursive_sequence_lengths()) np_data = np.array(results[0]) print("Inference Shape: ", np_data.shape) diff --git a/python/paddle/fluid/tests/test_data_feeder.py b/python/paddle/fluid/tests/test_data_feeder.py index ce3ba3ebc5..30b7a634a2 100644 --- a/python/paddle/fluid/tests/test_data_feeder.py +++ b/python/paddle/fluid/tests/test_data_feeder.py @@ -22,12 +22,11 @@ class TestDataFeeder(unittest.TestCase): label = fluid.layers.data(name='label', shape=[1], dtype='int64') feeder = fluid.DataFeeder([img, label], fluid.CPUPlace()) result = feeder.feed([([0] * 784, [9]), ([1] * 784, [1])]) - print(result) self.assertEqual(result['image'].shape(), [2, 1, 28, 28]) self.assertEqual(result['label'].shape(), [2, 1]) - self.assertEqual(result['image'].lod(), []) - self.assertEqual(result['label'].lod(), []) + self.assertEqual(result['image'].recursive_sequence_lengths(), []) + self.assertEqual(result['label'].recursive_sequence_lengths(), []) def test_lod_level_1_converter(self): # lod_level = 1 @@ -42,12 +41,12 @@ class TestDataFeeder(unittest.TestCase): # label = [1] * len(data) result = feeder.feed( [([1, 2, 3], [1]), ([4, 5], [1]), ([6, 7, 8, 9], [1])]) - print(result) self.assertEqual(result['sentences'].shape(), [9, 1]) self.assertEqual(result['label'].shape(), [3, 1]) - self.assertEqual(result['sentences'].lod(), [[0, 3, 5, 9]]) - self.assertEqual(result['label'].lod(), []) + self.assertEqual(result['sentences'].recursive_sequence_lengths(), + [[3, 2, 4]]) + self.assertEqual(result['label'].recursive_sequence_lengths(), []) def test_lod_level_2_converter(self): # lod_level = 2 @@ -62,12 +61,12 @@ class TestDataFeeder(unittest.TestCase): # label = [1] * len(data) result = feeder.feed( [([[1, 2, 3], [4, 5]], [1]), ([[6, 7, 8, 9]], [1])]) - print(result) self.assertEqual(result['paragraphs'].shape(), [9, 1]) self.assertEqual(result['label'].shape(), [2, 1]) - self.assertEqual(result['paragraphs'].lod(), [[0, 2, 3], [0, 3, 5, 9]]) - self.assertEqual(result['label'].lod(), []) + self.assertEqual(result['paragraphs'].recursive_sequence_lengths(), + [[2, 1], [3, 2, 4]]) + self.assertEqual(result['label'].recursive_sequence_lengths(), []) if __name__ == '__main__': diff --git a/python/paddle/fluid/tests/test_detection.py b/python/paddle/fluid/tests/test_detection.py index 8569d838bd..2d70c986b1 100644 --- a/python/paddle/fluid/tests/test_detection.py +++ b/python/paddle/fluid/tests/test_detection.py @@ -127,6 +127,24 @@ class TestPriorBox(unittest.TestCase): assert box.shape[3] == 4 +class TestAnchorGenerator(unittest.TestCase): + def test_anchor_generator(self): + data_shape = [3, 224, 224] + images = fluid.layers.data( + name='pixel', shape=data_shape, dtype='float32') + conv1 = fluid.layers.conv2d(images, 3, 3, 2) + anchor, var = fluid.layers.anchor_generator( + input=conv1, + anchor_sizes=[64, 128, 256, 512], + aspect_ratios=[0.5, 1.0, 2.0], + variance=[0.1, 0.1, 0.2, 0.2], + stride=[16.0, 16.0], + offset=0.5) + assert len(anchor.shape) == 4 + assert anchor.shape == var.shape + assert anchor.shape[3] == 4 + + class TestMultiBoxHead(unittest.TestCase): def test_multi_box_head(self): data_shape = [3, 224, 224] diff --git a/python/paddle/fluid/tests/test_mnist_if_else_op.py b/python/paddle/fluid/tests/test_if_else_op.py similarity index 66% rename from python/paddle/fluid/tests/test_mnist_if_else_op.py rename to python/paddle/fluid/tests/test_if_else_op.py index d34f52db5f..1b58925599 100644 --- a/python/paddle/fluid/tests/test_mnist_if_else_op.py +++ b/python/paddle/fluid/tests/test_if_else_op.py @@ -14,10 +14,11 @@ import paddle import paddle.fluid.layers as layers -from paddle.fluid.framework import Program, program_guard, default_main_program, default_startup_program +from paddle.fluid.framework import Program, program_guard from paddle.fluid.executor import Executor from paddle.fluid.optimizer import MomentumOptimizer import paddle.fluid.core as core +import paddle.fluid as fluid import unittest import numpy as np @@ -31,14 +32,13 @@ class TestMNISTIfElseOp(unittest.TestCase): label = layers.data(name='y', shape=[1], dtype='int64') - limit = layers.fill_constant_batch_size_like( - input=label, dtype='int64', shape=[1], value=5.0) + limit = layers.fill_constant(shape=[1], dtype='int64', value=5) cond = layers.less_than(x=label, y=limit) true_image, false_image = layers.split_lod_tensor( input=image, mask=cond) true_out = layers.create_tensor(dtype='float32') - true_cond = layers.ConditionalBlock([true_image]) + true_cond = layers.ConditionalBlock([cond]) with true_cond.block(): hidden = layers.fc(input=true_image, size=100, act='tanh') @@ -46,7 +46,7 @@ class TestMNISTIfElseOp(unittest.TestCase): layers.assign(input=prob, output=true_out) false_out = layers.create_tensor(dtype='float32') - false_cond = layers.ConditionalBlock([false_image]) + false_cond = layers.ConditionalBlock([cond]) with false_cond.block(): hidden = layers.fc(input=false_image, size=200, act='tanh') @@ -64,7 +64,7 @@ class TestMNISTIfElseOp(unittest.TestCase): train_reader = paddle.batch( paddle.reader.shuffle( paddle.dataset.mnist.train(), buf_size=8192), - batch_size=200) + batch_size=10) place = core.CPUPlace() exe = Executor(place) @@ -94,8 +94,7 @@ class TestMNISTIfElseOp(unittest.TestCase): label = layers.data(name='y', shape=[1], dtype='int64') - limit = layers.fill_constant_batch_size_like( - input=label, dtype='int64', shape=[1], value=5.0) + limit = layers.fill_constant(shape=[1], dtype='int64', value=5) cond = layers.less_than(x=label, y=limit) ie = layers.IfElse(cond) @@ -125,7 +124,7 @@ class TestMNISTIfElseOp(unittest.TestCase): place = core.CPUPlace() exe = Executor(place) - exe.run(kwargs['startup_program']) + exe.run(startup_prog) PASS_NUM = 100 for pass_id in range(PASS_NUM): for data in train_reader(): @@ -133,7 +132,7 @@ class TestMNISTIfElseOp(unittest.TestCase): y_data = np.array(map(lambda x: x[1], data)).astype("int64") y_data = y_data.reshape((y_data.shape[0], 1)) - outs = exe.run(kwargs['main_program'], + outs = exe.run(prog, feed={'x': x_data, 'y': y_data}, fetch_list=[avg_loss]) @@ -143,6 +142,67 @@ class TestMNISTIfElseOp(unittest.TestCase): self.assertFalse(True) +class TestIfElse(unittest.TestCase): + def set_test_case(self): + # condiction is: self.data < self.cond_value + self.cond_value = 0.5 + self.data = np.random.rand(25, 1).astype(np.float32) + + def compare_ifelse_op_and_numpy(self, place): + self.set_test_case() + + prog = Program() + startup_prog = Program() + with program_guard(prog, startup_prog): + src = layers.data(name='data', shape=[1], dtype='float32') + cond = layers.fill_constant( + [1], dtype='float32', value=self.cond_value) + ifcond = layers.less_than(x=src, y=cond) + ie = layers.IfElse(ifcond) + with ie.true_block(): + true_target = ie.input(src) + ie.output(true_target) + + with ie.false_block(): + false_target = ie.input(src) + ie.output(false_target) + if_out = ie() + out = layers.reduce_sum(if_out) + + exe = fluid.Executor(place) + exe.run(fluid.default_startup_program()) + fetch_list = [out] + o1, = exe.run(fluid.default_main_program(), + feed={'data': self.data}, + fetch_list=[out]) + o2 = np.sum(self.data) + self.assertTrue( + np.allclose( + o1, o2, atol=1e-8), + "IfElse result : " + str(o1) + "\n Numpy result :" + str(o2)) + + def test_cpu(self): + self.compare_ifelse_op_and_numpy(fluid.CPUPlace()) + + def test_cuda(self): + if not core.is_compiled_with_cuda(): + return + self.compare_ifelse_op_and_numpy(fluid.CUDAPlace(0)) + + +class TestIfElseTrueBranch(TestIfElse): + def set_test_case(self): + # condiction is: self.data < self.cond_value + self.cond_value = 10. + self.data = np.random.rand(25, 1).astype(np.float32) + + +class TestIfElseFalseBranch(TestIfElse): + def set_test_case(self): + # condiction is: self.data < self.cond_value + self.cond_value = -10. + self.data = np.random.rand(25, 1).astype(np.float32) + + if __name__ == '__main__': - # temp disable if else unittest since it could be buggy. - exit(0) + unittest.main() diff --git a/python/paddle/fluid/tests/test_lod_tensor.py b/python/paddle/fluid/tests/test_lod_tensor.py index 013d72f418..f7a9dd4129 100644 --- a/python/paddle/fluid/tests/test_lod_tensor.py +++ b/python/paddle/fluid/tests/test_lod_tensor.py @@ -13,77 +13,85 @@ # limitations under the License. import paddle.fluid as fluid -from paddle.fluid.lod_tensor import create_lod_tensor, create_random_int_lodtensor, _validate_lod, _convert_lod -import numpy +from paddle.fluid.lod_tensor import create_lod_tensor, create_random_int_lodtensor +import numpy as np import unittest class TestLoDTensor(unittest.TestCase): - def test_validate_lod(self): - lod = (1, 2, 1) - self.assertRaises(AssertionError, _validate_lod, lod, -1) - lod = [[1, 2], (2, 3)] - self.assertRaises(AssertionError, _validate_lod, lod, -1) - lod = [1, 2, 3] - self.assertRaises(AssertionError, _validate_lod, lod, -1) + def test_pybind_recursive_seq_lens(self): + tensor = fluid.LoDTensor() + recursive_seq_lens = [] + tensor.set_recursive_sequence_lengths(recursive_seq_lens) + recursive_seq_lens = [[], [1], [3]] + self.assertRaises(Exception, tensor.set_recursive_sequence_lengths, + recursive_seq_lens) + recursive_seq_lens = [[0], [2], [3]] + self.assertRaises(Exception, tensor.set_recursive_sequence_lengths, + recursive_seq_lens) - lod = [] - self.assertTrue(_validate_lod(lod, -1)) - lod = [[], [1], [3]] - self.assertFalse(_validate_lod(lod, -1)) - lod = [[0], [-1], [3]] - self.assertFalse(_validate_lod(lod, -1)) + recursive_seq_lens = [[1, 2, 3]] + tensor.set_recursive_sequence_lengths(recursive_seq_lens) + self.assertEqual(tensor.recursive_sequence_lengths(), + recursive_seq_lens) + tensor.set(np.random.random([6, 1]), fluid.CPUPlace()) + self.assertTrue(tensor.has_valid_recursive_sequence_lengths()) + tensor.set(np.random.random([9, 1]), fluid.CPUPlace()) + self.assertFalse(tensor.has_valid_recursive_sequence_lengths()) # Each level's sum should be equal to the number of items in the next level # Moreover, last level's sum should be equal to the tensor height - lod = [[2, 3], [1, 3, 1, 2, 1]] - self.assertTrue(_validate_lod(lod, tensor_height=8)) - lod = [[1, 3], [2, 1, 3]] - self.assertFalse(_validate_lod(lod, tensor_height=6)) - lod = [[1, 3], [2, 1, 3, 4]] - self.assertFalse(_validate_lod(lod, tensor_height=5)) - - def test_convert_lod(self): - lod = [[1, 2, 3]] - converted_lod = [[0, 1, 3, 6]] - self.assertEqual(_convert_lod(lod), converted_lod) - - lod = [[2, 3], [1, 3, 1, 2, 1]] - converted_lod = [[0, 2, 5], [0, 1, 4, 5, 7, 8]] - self.assertEqual(_convert_lod(lod), converted_lod) + recursive_seq_lens = [[2, 3], [1, 3, 1, 2, 2]] + tensor.set_recursive_sequence_lengths(recursive_seq_lens) + self.assertEqual(tensor.recursive_sequence_lengths(), + recursive_seq_lens) + tensor.set(np.random.random([8, 1]), fluid.CPUPlace()) + self.assertFalse(tensor.has_valid_recursive_sequence_lengths()) + recursive_seq_lens = [[2, 3], [1, 3, 1, 2, 1]] + tensor.set_recursive_sequence_lengths(recursive_seq_lens) + self.assertTrue(tensor.has_valid_recursive_sequence_lengths()) + tensor.set(np.random.random([9, 1]), fluid.CPUPlace()) + self.assertFalse(tensor.has_valid_recursive_sequence_lengths()) def test_create_lod_tensor(self): # Create LoDTensor from a list data = [[1, 2, 3], [3, 4]] - wrong_lod = [[2, 2]] - correct_lod = [[3, 2]] - self.assertRaises(AssertionError, create_lod_tensor, data, wrong_lod, - fluid.CPUPlace()) - tensor = create_lod_tensor(data, correct_lod, fluid.CPUPlace()) - self.assertEqual(tensor.lod(), [[0, 3, 5]]) + wrong_recursive_seq_lens = [[2, 2]] + correct_recursive_seq_lens = [[3, 2]] + self.assertRaises(AssertionError, create_lod_tensor, data, + wrong_recursive_seq_lens, fluid.CPUPlace()) + tensor = create_lod_tensor(data, correct_recursive_seq_lens, + fluid.CPUPlace()) + self.assertEqual(tensor.recursive_sequence_lengths(), + correct_recursive_seq_lens) # Create LoDTensor from numpy array - data = numpy.random.random([10, 1]) - lod = [[2, 1], [3, 3, 4]] - tensor = create_lod_tensor(data, lod, fluid.CPUPlace()) - self.assertEqual(tensor.lod(), [[0, 2, 3], [0, 3, 6, 10]]) + data = np.random.random([10, 1]) + recursive_seq_lens = [[2, 1], [3, 3, 4]] + tensor = create_lod_tensor(data, recursive_seq_lens, fluid.CPUPlace()) + self.assertEqual(tensor.recursive_sequence_lengths(), + recursive_seq_lens) # Create LoDTensor from another LoDTensor, they are differnt instances - new_lod = [[2, 2, 1], [1, 2, 2, 3, 2]] - new_tensor = create_lod_tensor(tensor, new_lod, fluid.CPUPlace()) - self.assertEqual(tensor.lod(), [[0, 2, 3], [0, 3, 6, 10]]) - self.assertEqual(new_tensor.lod(), [[0, 2, 4, 5], [0, 1, 3, 5, 8, 10]]) + new_recursive_seq_lens = [[2, 2, 1], [1, 2, 2, 3, 2]] + new_tensor = create_lod_tensor(tensor, new_recursive_seq_lens, + fluid.CPUPlace()) + self.assertEqual(tensor.recursive_sequence_lengths(), + recursive_seq_lens) + self.assertEqual(new_tensor.recursive_sequence_lengths(), + new_recursive_seq_lens) def test_create_random_int_lodtensor(self): # The shape of a word, commonly used in speech and NLP problem, is [1] shape = [1] - lod = [[2, 3, 5]] + recursive_seq_lens = [[2, 3, 5]] dict_size = 10000 low = 0 high = dict_size - 1 - tensor = create_random_int_lodtensor(lod, shape, + tensor = create_random_int_lodtensor(recursive_seq_lens, shape, fluid.CPUPlace(), low, high) - self.assertEqual(tensor.lod(), [[0, 2, 5, 10]]) + self.assertEqual(tensor.recursive_sequence_lengths(), + recursive_seq_lens) self.assertEqual(tensor.shape(), [10, 1]) diff --git a/python/paddle/fluid/tests/unittests/.gitignore b/python/paddle/fluid/tests/unittests/.gitignore index 3538a9c200..b1e8fda03a 100644 --- a/python/paddle/fluid/tests/unittests/.gitignore +++ b/python/paddle/fluid/tests/unittests/.gitignore @@ -4,3 +4,5 @@ mnist_1.recordio mnist_2.recordio flowers.recordio wmt16.recordio +data_balance_test.recordio +data_balance_with_lod_test.recordio diff --git a/python/paddle/fluid/tests/unittests/CMakeLists.txt b/python/paddle/fluid/tests/unittests/CMakeLists.txt index 21182393bd..f6c8dcabcb 100644 --- a/python/paddle/fluid/tests/unittests/CMakeLists.txt +++ b/python/paddle/fluid/tests/unittests/CMakeLists.txt @@ -15,7 +15,7 @@ if(NOT WITH_DISTRIBUTE) endif(NOT WITH_DISTRIBUTE) list(REMOVE_ITEM TEST_OPS test_seq_concat_op) # FIXME(helin): https://github.com/PaddlePaddle/Paddle/issues/8290 -list(REMOVE_ITEM TEST_OPS test_modified_huber_loss_op) # FIXME(qijun) https://github.com/PaddlePaddle/Paddle/issues/5184 +list(REMOVE_ITEM TEST_OPS test_modified_huber_loss_op) # FIXME(qijun) https://github.com/PaddlePaddle/Paddle/issues/5184 list(REMOVE_ITEM TEST_OPS test_lstm_unit_op) # # FIXME(qijun) https://github.com/PaddlePaddle/Paddle/issues/5185 list(REMOVE_ITEM TEST_OPS test_nce) # FIXME(qijun) https://github.com/PaddlePaddle/Paddle/issues/7778 list(REMOVE_ITEM TEST_OPS test_recurrent_op) # FIXME(qijun) https://github.com/PaddlePaddle/Paddle/issues/6152 @@ -43,8 +43,6 @@ list(REMOVE_ITEM TEST_OPS test_warpctc_op) list(REMOVE_ITEM TEST_OPS test_dist_train) list(REMOVE_ITEM TEST_OPS test_parallel_executor_crf) list(REMOVE_ITEM TEST_OPS test_parallel_executor_fetch_feed) -# TODO(wuyi): this test hungs on CI, will add it back later -list(REMOVE_ITEM TEST_OPS test_listen_and_serv_op) foreach(TEST_OP ${TEST_OPS}) py_test_modules(${TEST_OP} MODULES ${TEST_OP}) endforeach(TEST_OP) @@ -52,3 +50,6 @@ py_test_modules(test_warpctc_op MODULES test_warpctc_op ENVS FLAGS_warpctc_dir=$ py_test_modules(test_dist_train MODULES test_dist_train SERIAL) py_test_modules(test_parallel_executor_crf MODULES test_parallel_executor_crf SERIAL) py_test_modules(test_parallel_executor_fetch_feed MODULES test_parallel_executor_fetch_feed SERIAL) +set_tests_properties(test_listen_and_serv_op PROPERTIES TIMEOUT 20) +set_tests_properties(test_dist_mnist PROPERTIES TIMEOUT 180) +set_tests_properties(test_dist_word2vec PROPERTIES TIMEOUT 180) diff --git a/python/paddle/fluid/tests/unittests/op_test.py b/python/paddle/fluid/tests/unittests/op_test.py index 307caae4b0..e056ef9952 100644 --- a/python/paddle/fluid/tests/unittests/op_test.py +++ b/python/paddle/fluid/tests/unittests/op_test.py @@ -162,7 +162,7 @@ class OpTest(unittest.TestCase): tensor = core.LoDTensor() if isinstance(np_value, tuple): tensor.set(np_value[0], place) - tensor.set_lod(np_value[1]) + tensor.set_recursive_sequence_lengths(np_value[1]) else: tensor.set(np_value, place) feed_map[name] = tensor @@ -170,7 +170,8 @@ class OpTest(unittest.TestCase): tensor = core.LoDTensor() if isinstance(self.inputs[var_name], tuple): tensor.set(self.inputs[var_name][0], place) - tensor.set_lod(self.inputs[var_name][1]) + tensor.set_recursive_sequence_lengths(self.inputs[var_name][ + 1]) else: tensor.set(self.inputs[var_name], place) feed_map[var_name] = tensor @@ -293,7 +294,8 @@ class OpTest(unittest.TestCase): str(place)) if isinstance(expect, tuple): self.assertListEqual( - actual.lod(), expect[1], "Output (" + sub_out_name + + actual.recursive_sequence_lengths(), expect[1], + "Output (" + sub_out_name + ") has different lod at " + str(place)) else: idx = find_actual(out_name, fetch_list) @@ -307,8 +309,8 @@ class OpTest(unittest.TestCase): "Output (" + out_name + ") has diff at " + str(place) + str(actual_t) + "\n" + str(expect_t)) if isinstance(expect, tuple): - self.assertListEqual(actual.lod(), expect[1], - "Output (" + out_name + + self.assertListEqual(actual.recursive_sequence_lengths(), + expect[1], "Output (" + out_name + ") has different lod at " + str(place)) def _get_places(self): @@ -408,7 +410,7 @@ class OpTest(unittest.TestCase): tensor = core.LoDTensor() tensor.set(np_value, place) if lod is not None: - tensor.set_lod(lod) + tensor.set_recursive_sequence_lengths(lod) return tensor @staticmethod diff --git a/python/paddle/fluid/tests/unittests/parallel_executor_test_base.py b/python/paddle/fluid/tests/unittests/parallel_executor_test_base.py index 829c5a1a5f..cddf00765f 100644 --- a/python/paddle/fluid/tests/unittests/parallel_executor_test_base.py +++ b/python/paddle/fluid/tests/unittests/parallel_executor_test_base.py @@ -18,6 +18,8 @@ import unittest import paddle.fluid as fluid import time import numpy as np +import math +import sys __all__ = ['TestParallelExecutorBase'] @@ -81,7 +83,6 @@ class TestParallelExecutorBase(unittest.TestCase): begin = time.time() first_loss, = run_executor( exe=exe, feed=feed_dict, fetch_list=[loss.name]) - first_loss = np.array(first_loss) for i in xrange(iter): run_executor(exe=exe, feed=feed_dict, fetch_list=[]) @@ -94,7 +95,11 @@ class TestParallelExecutorBase(unittest.TestCase): print "%.4f Instance per second" % ( (batch_size * iter + 2) / (end - begin)) - last_loss = np.array(last_loss) + avg_last_loss_val = np.array(last_loss).mean() + avg_first_loss_val = np.array(first_loss).mean() + if math.isnan(float(avg_last_loss_val)) or math.isnan( + float(avg_first_loss_val)): + sys.exit("got NaN loss, training failed.") print first_loss, last_loss # self.assertGreater(first_loss[0], last_loss[0]) diff --git a/python/paddle/fluid/tests/unittests/test_anchor_generator_op.py b/python/paddle/fluid/tests/unittests/test_anchor_generator_op.py new file mode 100644 index 0000000000..9c7d5d41f0 --- /dev/null +++ b/python/paddle/fluid/tests/unittests/test_anchor_generator_op.py @@ -0,0 +1,110 @@ +# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://w_idxw.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import unittest +import numpy as np +import sys +import math +from op_test import OpTest + + +def anchor_generator_in_python(input_feat, anchor_sizes, aspect_ratios, + variances, stride, offset): + num_anchors = len(aspect_ratios) * len(anchor_sizes) + layer_h = input_feat.shape[2] + layer_w = input_feat.shape[3] + out_dim = (layer_h, layer_w, num_anchors, 4) + out_anchors = np.zeros(out_dim).astype('float32') + + for h_idx in range(layer_h): + for w_idx in range(layer_w): + x_ctr = (w_idx * stride[0]) + offset * (stride[0] - 1) + y_ctr = (h_idx * stride[1]) + offset * (stride[1] - 1) + idx = 0 + for r in range(len(aspect_ratios)): + ar = aspect_ratios[r] + for s in range(len(anchor_sizes)): + anchor_size = anchor_sizes[s] + area = stride[0] * stride[1] + area_ratios = area / ar + base_w = np.round(np.sqrt(area_ratios)) + base_h = np.round(base_w * ar) + scale_w = anchor_size / stride[0] + scale_h = anchor_size / stride[1] + w = scale_w * base_w + h = scale_h * base_h + out_anchors[h_idx, w_idx, idx, :] = [ + (x_ctr - 0.5 * (w - 1)), (y_ctr - 0.5 * (h - 1)), + (x_ctr + 0.5 * (w - 1)), (y_ctr + 0.5 * (h - 1)) + ] + idx += 1 + + # set the variance. + out_var = np.tile(variances, (layer_h, layer_w, num_anchors, 1)) + out_anchors = out_anchors.astype('float32') + out_var = out_var.astype('float32') + return out_anchors, out_var + + +class TestAnchorGeneratorOp(OpTest): + def set_data(self): + self.init_test_params() + self.init_test_input() + self.init_test_output() + self.inputs = {'Input': self.input} + + self.attrs = { + 'anchor_sizes': self.anchor_sizes, + 'aspect_ratios': self.aspect_ratios, + 'stride': self.stride, + 'offset': self.offset, + 'variances': self.variances, + } + + self.outputs = {'Anchors': self.out_anchors, 'Variances': self.out_var} + + def test_check_output(self): + self.check_output() + + def setUp(self): + self.op_type = "anchor_generator" + self.set_data() + + def init_test_params(self): + self.batch_size = 1 + self.input_channels = 2 + self.layer_h = 2 + self.layer_w = 2 + + self.anchor_sizes = [64., 128., 256., 512.] + self.aspect_ratios = [0.5, 1., 2.] + self.stride = [16., 16.] + + self.offset = 0.5 + + self.variances = [0.1, 0.1, 0.2, 0.2] + + def init_test_input(self): + self.input = np.random.random( + (self.batch_size, self.input_channels, self.layer_h, + self.layer_w)).astype('float32') + + def init_test_output(self): + self.out_anchors, self.out_var = anchor_generator_in_python( + self.input, self.anchor_sizes, self.aspect_ratios, self.variances, + self.stride, self.offset) + + +if __name__ == '__main__': + unittest.main() diff --git a/python/paddle/fluid/tests/unittests/test_argsort_op.py b/python/paddle/fluid/tests/unittests/test_argsort_op.py new file mode 100644 index 0000000000..b29a102a38 --- /dev/null +++ b/python/paddle/fluid/tests/unittests/test_argsort_op.py @@ -0,0 +1,56 @@ +# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import unittest +import numpy as np +from op_test import OpTest + + +class TestArgsortOp(OpTest): + def setUp(self): + self.init_axis() + x = np.random.random((2, 3, 4, 5, 10)).astype("float32") + if self.axis < 0: + self.axis = self.axis + len(x.shape) + self.indices = np.argsort(x, kind='quicksort', axis=self.axis) + self.out = np.sort(x, kind='quicksort', axis=self.axis) + self.op_type = "argsort" + self.inputs = {'X': x} + self.attrs = {'axis': self.axis} + self.outputs = {'Indices': self.indices, 'Out': self.out} + + def init_axis(self): + self.axis = -1 + + def test_check_output(self): + self.check_output() + + +class TestArgsortOpAxis0(TestArgsortOp): + def init_axis(self): + self.axis = 0 + + +class TestArgsortOpAxis1(TestArgsortOp): + def init_axis(self): + self.axis = 1 + + +class TestArgsortOpAxisNeg2(TestArgsortOp): + def init_axis(self): + self.axis = -2 + + +if __name__ == "__main__": + unittest.main() diff --git a/python/paddle/fluid/tests/unittests/test_batch_norm_mkldnn_op.py b/python/paddle/fluid/tests/unittests/test_batch_norm_mkldnn_op.py index f6097d4b84..18fa546159 100644 --- a/python/paddle/fluid/tests/unittests/test_batch_norm_mkldnn_op.py +++ b/python/paddle/fluid/tests/unittests/test_batch_norm_mkldnn_op.py @@ -52,5 +52,17 @@ class TestMKLDNNBatchNormOpInference(TestBatchNormOpInference): self.check_with_place(place, data_format, self.dtype, [2, 3, 4, 5]) +class TestMKLDNNBatchNormOpWithReluInference(TestBatchNormOpInference): + def init_kernel_type(self): + self.use_mkldnn = True + self.fuse_with_relu = True + + def test_check_output(self): + place = core.CPUPlace() + data_format = "NCHW" + + self.check_with_place(place, data_format, self.dtype, [2, 3, 4, 5]) + + if __name__ == '__main__': unittest.main() diff --git a/python/paddle/fluid/tests/unittests/test_batch_norm_op.py b/python/paddle/fluid/tests/unittests/test_batch_norm_op.py index 4216d83653..a62ee9596d 100644 --- a/python/paddle/fluid/tests/unittests/test_batch_norm_op.py +++ b/python/paddle/fluid/tests/unittests/test_batch_norm_op.py @@ -128,7 +128,7 @@ def create_or_get_tensor(scope, var_name, var, place): tensor = scope.var(var_name).get_tensor() if var is not None: assert isinstance(var, np.ndarray) - tensor.set_lod([[]]) + tensor.set_recursive_sequence_lengths([]) tensor.set_dims(var.shape) tensor.set(var, place) return tensor @@ -159,6 +159,7 @@ class TestBatchNormOpInference(unittest.TestCase): def setUp(self): self.dtype = np.float32 self.use_mkldnn = False + self.fuse_with_relu = False self.init_kernel_type() def __assert_close(self, tensor, np_array, msg, atol=1e-4): @@ -180,6 +181,8 @@ class TestBatchNormOpInference(unittest.TestCase): scale_shape = [c] x_val = np.random.random_sample(x_shape).astype(dtype) + # generate some negative values to test case with relu fused + x_val = x_val - 0.5 scale_val = np.random.random_sample(scale_shape).astype(np.float32) bias_val = np.random.random_sample(scale_shape).astype(np.float32) @@ -188,6 +191,8 @@ class TestBatchNormOpInference(unittest.TestCase): y_out = _reference_testing(x_val, scale_val, bias_val, mean, variance, epsilon, data_layout).astype(dtype) + if self.fuse_with_relu: + y_out = np.maximum(y_out, 0) scope = core.Scope() @@ -233,6 +238,7 @@ class TestBatchNormOpInference(unittest.TestCase): is_test=True, data_layout=data_layout, use_mkldnn=self.use_mkldnn, + fuse_with_relu=self.fuse_with_relu, epsilon=epsilon) batch_norm_op.run(scope, place) @@ -265,6 +271,7 @@ class TestFP16BatchNormOpInference(TestBatchNormOpInference): def setUp(self): self.dtype = np.float16 self.use_mkldnn = False + self.fuse_with_relu = False self.init_kernel_type() def test_check_output(self): @@ -284,6 +291,7 @@ class TestFP16BatchNormOpInference(TestBatchNormOpInference): class TestBatchNormOpTraining(unittest.TestCase): def setUp(self): self.use_mkldnn = False + self.fuse_with_relu = False self.data_formats = ["NCHW", "NHWC"] self.init_kernel_type() @@ -367,7 +375,8 @@ class TestBatchNormOpTraining(unittest.TestCase): "epsilon": epsilon, "is_test": False, "data_layout": data_layout, - "use_mkldnn": self.use_mkldnn + "use_mkldnn": self.use_mkldnn, + "fuse_with_relu": self.fuse_with_relu }) block.create_var(name='y@GRAD', dtype='float32', shape=y.shape) diff --git a/python/paddle/fluid/tests/unittests/test_beam_search_decode_op.py b/python/paddle/fluid/tests/unittests/test_beam_search_decode_op.py index 7976dd7c3f..db5771f7b0 100644 --- a/python/paddle/fluid/tests/unittests/test_beam_search_decode_op.py +++ b/python/paddle/fluid/tests/unittests/test_beam_search_decode_op.py @@ -20,6 +20,8 @@ from paddle.fluid.op import Operator class TestBeamSearchDecodeOp(unittest.TestCase): + """unittest of beam_search_decode_op""" + def setUp(self): self.scope = core.Scope() self.place = core.CPUPlace() @@ -32,32 +34,44 @@ class TestBeamSearchDecodeOp(unittest.TestCase): def test_get_set(self): ids = self.scope.var("ids").get_lod_tensor_array() - self.append_lod_tensor( - ids, [[0, 3, 6], [0, 1, 2, 3, 4, 5, 6]], - np.array( - [1, 2, 3, 4, 5, 6], dtype="int64")) - self.append_lod_tensor( - ids, [[0, 3, 6], [0, 1, 1, 3, 5, 5, 6]], - np.array( - [0, 1, 2, 3, 4, 5], dtype="int64")) - self.append_lod_tensor( - ids, [[0, 3, 6], [0, 0, 1, 2, 3, 4, 5]], - np.array( - [0, 1, 2, 3, 4], dtype="int64")) - scores = self.scope.var("scores").get_lod_tensor_array() - self.append_lod_tensor( - scores, [[0, 3, 6], [0, 1, 2, 3, 4, 5, 6]], - np.array( - [1, 2, 3, 4, 5, 6], dtype="float64")) - self.append_lod_tensor( - scores, [[0, 3, 6], [0, 1, 1, 3, 5, 5, 6]], - np.array( - [0, 1, 2, 3, 4, 5], dtype="float64")) - self.append_lod_tensor( - scores, [[0, 3, 6], [0, 0, 1, 2, 3, 4, 5]], - np.array( - [0, 1, 2, 3, 4], dtype="float64")) + # Construct sample data with 5 steps and 2 source sentences + # beam_size = 2, end_id = 1 + # start with start_id + [ + self.append_lod_tensor( + array, [[0, 1, 2], [0, 1, 2]], np.array( + [0, 0], dtype=dtype)) + for array, dtype in ((ids, "int64"), (scores, "float32")) + ] + [ + self.append_lod_tensor( + array, [[0, 1, 2], [0, 2, 4]], + np.array( + [2, 3, 4, 5], dtype=dtype)) + for array, dtype in ((ids, "int64"), (scores, "float32")) + ] + [ + self.append_lod_tensor( + array, [[0, 2, 4], [0, 2, 2, 4, 4]], + np.array( + [3, 1, 5, 4], dtype=dtype)) + for array, dtype in ((ids, "int64"), (scores, "float32")) + ] + [ + self.append_lod_tensor( + array, [[0, 2, 4], [0, 1, 2, 3, 4]], + np.array( + [1, 1, 3, 5], dtype=dtype)) + for array, dtype in ((ids, "int64"), (scores, "float32")) + ] + [ + self.append_lod_tensor( + array, [[0, 2, 4], [0, 0, 0, 2, 2]], + np.array( + [5, 1], dtype=dtype)) + for array, dtype in ((ids, "int64"), (scores, "float32")) + ] sentence_ids = self.scope.var("sentence_ids").get_tensor() sentence_scores = self.scope.var("sentence_scores").get_tensor() @@ -69,16 +83,18 @@ class TestBeamSearchDecodeOp(unittest.TestCase): Scores="scores", # outputs SentenceIds="sentence_ids", - SentenceScores="sentence_scores") + SentenceScores="sentence_scores", + beam_size=2, + end_id=1, ) beam_search_decode_op.run(self.scope, self.place) - expected_lod = [[0, 4, 8], [0, 1, 3, 6, 9, 10, 13, 16, 19]] + expected_lod = [[0, 2, 4], [0, 4, 7, 12, 17]] self.assertEqual(sentence_ids.lod(), expected_lod) self.assertEqual(sentence_scores.lod(), expected_lod) expected_data = np.array( - [2, 1, 0, 3, 1, 0, 3, 2, 1, 5, 4, 3, 2, 4, 4, 3, 6, 5, 4], "int64") + [0, 2, 3, 1, 0, 2, 1, 0, 4, 5, 3, 5, 0, 4, 5, 3, 1], "int64") self.assertTrue(np.array_equal(np.array(sentence_ids), expected_data)) self.assertTrue( np.array_equal(np.array(sentence_scores), expected_data)) diff --git a/python/paddle/fluid/tests/unittests/test_beam_search_op.py b/python/paddle/fluid/tests/unittests/test_beam_search_op.py index bc708f3aff..167451edd8 100644 --- a/python/paddle/fluid/tests/unittests/test_beam_search_op.py +++ b/python/paddle/fluid/tests/unittests/test_beam_search_op.py @@ -26,9 +26,12 @@ def create_tensor(scope, name, np_data): class BeamSearchOpTester(unittest.TestCase): + """unittest of beam_search_op""" + def setUp(self): self.scope = core.Scope() self._create_ids() + self._create_pre_scores() self._create_scores() self._create_pre_ids() self.scope.var('selected_ids') @@ -37,7 +40,8 @@ class BeamSearchOpTester(unittest.TestCase): def test_run(self): op = Operator( 'beam_search', - pre_ids="pre_ids", + pre_ids='pre_ids', + pre_scores='pre_scores', ids='ids', scores='scores', selected_ids='selected_ids', @@ -47,15 +51,27 @@ class BeamSearchOpTester(unittest.TestCase): end_id=0, ) op.run(self.scope, core.CPUPlace()) selected_ids = self.scope.find_var("selected_ids").get_tensor() - print 'selected_ids', np.array(selected_ids) - print 'lod', selected_ids.lod() + selected_scores = self.scope.find_var("selected_scores").get_tensor() + self.assertTrue( + np.allclose( + np.array(selected_ids), np.array([4, 2, 3, 8])[:, np.newaxis])) + self.assertTrue( + np.allclose( + np.array(selected_scores), + np.array([0.5, 0.6, 0.9, 0.7])[:, np.newaxis])) + self.assertEqual(selected_ids.lod(), + [[0L, 2L, 4L], [0L, 1L, 2L, 3L, 4L]]) def _create_pre_ids(self): np_data = np.array([[1, 2, 3, 4]], dtype='int64') - tensor = create_tensor(self.scope, "pre_ids", np_data) + tensor = create_tensor(self.scope, 'pre_ids', np_data) + + def _create_pre_scores(self): + np_data = np.array([[0.1, 0.2, 0.3, 0.4]], dtype='float32') + tensor = create_tensor(self.scope, 'pre_scores', np_data) def _create_ids(self): - self.lod = [[0, 1, 4], [0, 1, 2, 3, 4]] + self.lod = [[0, 2, 4], [0, 1, 2, 3, 4]] np_data = np.array( [[4, 2, 5], [2, 1, 3], [3, 5, 2], [8, 2, 1]], dtype='int64') tensor = create_tensor(self.scope, "ids", np_data) diff --git a/python/paddle/fluid/tests/unittests/test_bilinear_interp_op.py b/python/paddle/fluid/tests/unittests/test_bilinear_interp_op.py index 87c11e7880..b04f25ef87 100644 --- a/python/paddle/fluid/tests/unittests/test_bilinear_interp_op.py +++ b/python/paddle/fluid/tests/unittests/test_bilinear_interp_op.py @@ -15,6 +15,7 @@ import unittest import numpy as np from op_test import OpTest +import paddle.fluid.core as core def bilinear_interp_np(input, out_h, out_w, out_size): @@ -45,9 +46,9 @@ def bilinear_interp_np(input, out_h, out_w, out_size): out[:, :, i, j] = h2lambda*(w2lambda*input[:, :, h, w] + w1lambda*input[:, :, h, w+wid]) + \ - h1lambda*(w2lambda*input[:, :, h+hid, w] + - w1lambda*input[:, :, h+hid, w+wid]) - return out.astype("float32") + h1lambda*(w2lambda*input[:, :, h+hid, w] + + w1lambda*input[:, :, h+hid, w+wid]) + return out.astype(input.dtype) class TestBilinearInterpOp(OpTest): @@ -122,5 +123,44 @@ class TestCase6(TestBilinearInterpOp): self.out_size = np.array([65, 129]).astype("int32") +class TestBilinearInterpOpUint8(OpTest): + def setUp(self): + self.out_size = None + self.init_test_case() + self.op_type = "bilinear_interp" + input_np = np.random.randint( + low=0, high=256, size=self.input_shape).astype("uint8") + output_np = bilinear_interp_np(input_np, self.out_h, self.out_w, + self.out_size) + self.inputs = {'X': input_np} + if self.out_size is not None: + self.inputs['OutSize'] = self.out_size + self.attrs = {'out_h': self.out_h, 'out_w': self.out_w} + self.outputs = {'Out': output_np} + + def test_check_output(self): + self.check_output_with_place(place=core.CPUPlace(), atol=1) + + def init_test_case(self): + self.input_shape = [1, 3, 9, 6] + self.out_h = 10 + self.out_w = 9 + + +class TestCase1Uint8(TestBilinearInterpOpUint8): + def init_test_case(self): + self.input_shape = [2, 3, 128, 64] + self.out_h = 120 + self.out_w = 50 + + +class TestCase2Uint8(TestBilinearInterpOpUint8): + def init_test_case(self): + self.input_shape = [4, 1, 7, 8] + self.out_h = 5 + self.out_w = 13 + self.out_size = np.array([6, 15]).astype("int32") + + if __name__ == "__main__": unittest.main() diff --git a/python/paddle/fluid/tests/unittests/test_bipartite_match_op.py b/python/paddle/fluid/tests/unittests/test_bipartite_match_op.py index f7461ee6da..d5bd726c4a 100644 --- a/python/paddle/fluid/tests/unittests/test_bipartite_match_op.py +++ b/python/paddle/fluid/tests/unittests/test_bipartite_match_op.py @@ -65,23 +65,25 @@ def batch_bipartite_match(distance, lod, match_type=None, dist_threshold=None): distance (numpy.array) : The distance of two entries with shape [M, N]. lod (list of int): The offsets of each input in this batch. """ - n = len(lod) - 1 + n = len(lod) m = distance.shape[1] match_indices = -1 * np.ones((n, m), dtype=np.int) match_dist = np.zeros((n, m), dtype=np.float32) - for i in range(len(lod) - 1): - bipartite_match(distance[lod[i]:lod[i + 1], :], match_indices[i, :], - match_dist[i, :]) + cur_offset = 0 + for i in range(n): + bipartite_match(distance[cur_offset:(cur_offset + lod[i]), :], + match_indices[i, :], match_dist[i, :]) if match_type == 'per_prediction': - argmax_match(distance[lod[i]:lod[i + 1], :], match_indices[i, :], - match_dist[i, :], dist_threshold) + argmax_match(distance[cur_offset:(cur_offset + lod[i]), :], + match_indices[i, :], match_dist[i, :], dist_threshold) + cur_offset += lod[i] return match_indices, match_dist class TestBipartiteMatchOpWithLoD(OpTest): def setUp(self): self.op_type = 'bipartite_match' - lod = [[0, 5, 11, 23]] + lod = [[5, 6, 12]] dist = np.random.random((23, 217)).astype('float32') match_indices, match_dist = batch_bipartite_match(dist, lod[0]) @@ -98,7 +100,7 @@ class TestBipartiteMatchOpWithLoD(OpTest): class TestBipartiteMatchOpWithoutLoD(OpTest): def setUp(self): self.op_type = 'bipartite_match' - lod = [[0, 8]] + lod = [[8]] dist = np.random.random((8, 17)).astype('float32') match_indices, match_dist = batch_bipartite_match(dist, lod[0]) @@ -112,10 +114,27 @@ class TestBipartiteMatchOpWithoutLoD(OpTest): self.check_output() +class TestBipartiteMatchOpWithoutLoDLargeScaleInput(OpTest): + def setUp(self): + self.op_type = 'bipartite_match' + lod = [[300]] + dist = np.random.random((300, 17)).astype('float32') + match_indices, match_dist = batch_bipartite_match(dist, lod[0]) + + self.inputs = {'DistMat': dist} + self.outputs = { + 'ColToRowMatchIndices': match_indices, + 'ColToRowMatchDist': match_dist, + } + + def test_check_output(self): + self.check_output() + + class TestBipartiteMatchOpWithPerPredictionType(OpTest): def setUp(self): self.op_type = 'bipartite_match' - lod = [[0, 5, 11, 23]] + lod = [[5, 6, 12]] dist = np.random.random((23, 237)).astype('float32') match_indices, match_dist = batch_bipartite_match(dist, lod[0], 'per_prediction', 0.5) diff --git a/python/paddle/fluid/tests/unittests/test_box_coder_op.py b/python/paddle/fluid/tests/unittests/test_box_coder_op.py index b4c48d85f2..4ce9a4783e 100644 --- a/python/paddle/fluid/tests/unittests/test_box_coder_op.py +++ b/python/paddle/fluid/tests/unittests/test_box_coder_op.py @@ -81,15 +81,19 @@ def batch_box_coder(prior_box, prior_box_var, target_box, lod, code_type, n = target_box.shape[0] m = prior_box.shape[0] output_box = np.zeros((n, m, 4), dtype=np.float32) - for i in range(len(lod) - 1): + cur_offset = 0 + for i in range(len(lod)): if (code_type == "EncodeCenterSize"): - box_coder(target_box[lod[i]:lod[i + 1], :], prior_box, - prior_box_var, output_box[lod[i]:lod[i + 1], :, :], + box_coder(target_box[cur_offset:(cur_offset + lod[i]), :], + prior_box, prior_box_var, + output_box[cur_offset:(cur_offset + lod[i]), :, :], code_type, box_normalized) elif (code_type == "DecodeCenterSize"): - box_coder(target_box[lod[i]:lod[i + 1], :, :], prior_box, - prior_box_var, output_box[lod[i]:lod[i + 1], :, :], + box_coder(target_box[cur_offset:(cur_offset + lod[i]), :, :], + prior_box, prior_box_var, + output_box[cur_offset:(cur_offset + lod[i]), :, :], code_type, box_normalized) + cur_offset += lod[i] return output_box @@ -99,7 +103,7 @@ class TestBoxCoderOp(OpTest): def setUp(self): self.op_type = "box_coder" - lod = [[0, 1, 2, 3, 4, 5]] + lod = [[1, 1, 1, 1, 1]] prior_box = np.random.random((10, 4)).astype('float32') prior_box_var = np.random.random((10, 4)).astype('float32') target_box = np.random.random((5, 10, 4)).astype('float32') @@ -152,7 +156,7 @@ class TestBoxCoderOpWithLoD(OpTest): def setUp(self): self.op_type = "box_coder" - lod = [[0, 4, 12, 20]] + lod = [[4, 8, 8]] prior_box = np.random.random((10, 4)).astype('float32') prior_box_var = np.random.random((10, 4)).astype('float32') target_box = np.random.random((20, 4)).astype('float32') diff --git a/python/paddle/fluid/tests/unittests/test_chunk_eval_op.py b/python/paddle/fluid/tests/unittests/test_chunk_eval_op.py index 050df2801c..23932194f0 100644 --- a/python/paddle/fluid/tests/unittests/test_chunk_eval_op.py +++ b/python/paddle/fluid/tests/unittests/test_chunk_eval_op.py @@ -144,10 +144,10 @@ class TestChunkEvalOp(OpTest): starts = sorted(starts) self.num_correct_chunks, self.num_infer_chunks, self.num_label_chunks = self.gen_chunks( infer, label, starts) - self.inputs = { - 'Inference': (infer, [starts]), - 'Label': (label, [starts]) - } + lod = [] + for i in range(len(starts) - 1): + lod.append(starts[i + 1] - starts[i]) + self.inputs = {'Inference': (infer, [lod]), 'Label': (label, [lod])} precision = float( self.num_correct_chunks ) / self.num_infer_chunks if self.num_infer_chunks else 0 diff --git a/python/paddle/fluid/tests/unittests/test_concat_op.py b/python/paddle/fluid/tests/unittests/test_concat_op.py index 1e00d67d54..e9f3c45dc4 100644 --- a/python/paddle/fluid/tests/unittests/test_concat_op.py +++ b/python/paddle/fluid/tests/unittests/test_concat_op.py @@ -43,7 +43,7 @@ class TestConcatOp(OpTest): self.axis = 1 -class TestConcatOp2(OpTest): +class TestConcatOp2(TestConcatOp): def init_test_data(self): self.x0 = np.random.random((2, 3, 4, 5)).astype('float32') self.x1 = np.random.random((2, 3, 4, 5)).astype('float32') @@ -51,5 +51,16 @@ class TestConcatOp2(OpTest): self.axis = 1 +class TestConcatOp3(TestConcatOp): + def init_test_data(self): + self.x0 = np.random.random((1, 256, 170, 256)).astype('float32') + self.x1 = np.random.random((1, 128, 170, 256)).astype('float32') + self.x2 = np.random.random((1, 128, 170, 256)).astype('float32') + self.axis = 1 + + def test_check_grad(self): + pass + + if __name__ == '__main__': unittest.main() diff --git a/python/paddle/fluid/tests/unittests/test_conv2d_transpose_op.py b/python/paddle/fluid/tests/unittests/test_conv2d_transpose_op.py index ded2f13028..07545e7feb 100644 --- a/python/paddle/fluid/tests/unittests/test_conv2d_transpose_op.py +++ b/python/paddle/fluid/tests/unittests/test_conv2d_transpose_op.py @@ -242,6 +242,19 @@ class TestCUDNNWithGroups(TestWithGroups): self.op_type = "conv2d_transpose" +class TestDepthwiseConvTranspose(TestConv2dTransposeOp): + def init_test_case(self): + self.pad = [1, 1] + self.stride = [2, 2] + self.dilations = [1, 1] + self.input_size = [2, 8, 16, 16] # NCHW + self.groups = 8 + assert np.mod(self.input_size[1], self.groups) == 0 + f_c = self.input_size[1] / self.groups + self.filter_size = [self.input_size[1], f_c, 4, 4] + self.op_type = "depthwise_conv2d_transpose" + + # Please Don't remove the following code. # Currently, CI use cudnn V5.0 which not support dilation conv. # class TestCUDNNWithDilation(TestWithDilation): diff --git a/python/paddle/fluid/tests/unittests/test_crf_decoding_op.py b/python/paddle/fluid/tests/unittests/test_crf_decoding_op.py index f397f542bb..122b076c2d 100644 --- a/python/paddle/fluid/tests/unittests/test_crf_decoding_op.py +++ b/python/paddle/fluid/tests/unittests/test_crf_decoding_op.py @@ -22,9 +22,9 @@ from op_test import OpTest class CRFDecoding(object): def __init__(self, emission_weights, transition_weights, seq_start_positions): - assert (emission_weights.shape[0] == seq_start_positions[-1]) + assert (emission_weights.shape[0] == sum(seq_start_positions)) self.tag_num = emission_weights.shape[1] - self.seq_num = len(seq_start_positions) - 1 + self.seq_num = len(seq_start_positions) self.seq_start_positions = seq_start_positions self.x = emission_weights @@ -34,9 +34,9 @@ class CRFDecoding(object): self.w = transition_weights[2:, :] self.track = np.zeros( - (seq_start_positions[-1], self.tag_num), dtype="int64") + (sum(seq_start_positions), self.tag_num), dtype="int64") self.decoded_path = np.zeros( - (seq_start_positions[-1], 1), dtype="int64") + (sum(seq_start_positions), 1), dtype="int64") def _decode_one_sequence(self, decoded_path, x): seq_len, tag_num = x.shape @@ -71,9 +71,11 @@ class CRFDecoding(object): decoded_path[i - 1] = max_idx = track[i, max_idx] def decode(self): + cur_pos = 0 for i in range(self.seq_num): - start = self.seq_start_positions[i] - end = self.seq_start_positions[i + 1] + start = cur_pos + cur_pos += self.seq_start_positions[i] + end = cur_pos self._decode_one_sequence(self.decoded_path[start:end, :], self.x[start:end, :]) return self.decoded_path @@ -90,11 +92,13 @@ class TestCRFDecodingOp1(OpTest): TAG_NUM = 17 MAX_SEQ_LEN = 10 - lod = [[0]] + lod = [[]] + total_len = 0 for i in range(SEQ_NUM): - lod[-1].append(lod[-1][-1] + random.randint(1, MAX_SEQ_LEN)) + lod[-1].append(random.randint(1, MAX_SEQ_LEN)) + total_len += lod[-1][-1] emission = np.random.uniform(-1, 1, - [lod[-1][-1], TAG_NUM]).astype("float64") + [total_len, TAG_NUM]).astype("float64") transition = np.random.uniform(-0.5, 0.5, [TAG_NUM + 2, TAG_NUM]).astype("float64") @@ -126,7 +130,8 @@ class TestCRFDecodingOp2(OpTest): self.op_type = "crf_decoding" TAG_NUM = 5 - lod = [[0, 1, 3, 6, 10]] + lod = [[1, 2, 3, 4]] + total_len = sum(lod[-1]) transition = np.repeat( np.arange( TAG_NUM, dtype="float64").reshape(1, TAG_NUM), @@ -135,13 +140,13 @@ class TestCRFDecodingOp2(OpTest): emission = np.repeat( np.arange( TAG_NUM, dtype="float64").reshape(1, TAG_NUM), - lod[-1][-1], + total_len, axis=0) labels = np.random.randint( - low=0, high=TAG_NUM, size=(lod[-1][-1], 1), dtype="int64") + low=0, high=TAG_NUM, size=(total_len, 1), dtype="int64") predicted_labels = np.ones( - (lod[-1][-1], 1), dtype="int64") * (TAG_NUM - 1) + (total_len, 1), dtype="int64") * (TAG_NUM - 1) expected_output = (labels == predicted_labels).astype("int64") self.inputs = { diff --git a/python/paddle/fluid/tests/unittests/test_ctc_align.py b/python/paddle/fluid/tests/unittests/test_ctc_align.py index f166031a1c..131b4076f4 100644 --- a/python/paddle/fluid/tests/unittests/test_ctc_align.py +++ b/python/paddle/fluid/tests/unittests/test_ctc_align.py @@ -22,14 +22,16 @@ from test_softmax_op import stable_softmax def CTCAlign(input, lod, blank, merge_repeated): lod0 = lod[0] result = [] - for i in range(len(lod0) - 1): + cur_offset = 0 + for i in range(len(lod0)): prev_token = -1 - for j in range(lod0[i], lod0[i + 1]): + for j in range(cur_offset, cur_offset + lod0[i]): token = input[j][0] if (token != blank) and not (merge_repeated and token == prev_token): result.append(token) prev_token = token + cur_offset += lod0[i] result = np.array(result).reshape([len(result), 1]).astype("int32") if len(result) == 0: result = np.array([-1]) @@ -39,7 +41,7 @@ def CTCAlign(input, lod, blank, merge_repeated): class TestCTCAlignOp(OpTest): def config(self): self.op_type = "ctc_align" - self.input_lod = [[0, 11, 18]] + self.input_lod = [[11, 7]] self.blank = 0 self.merge_repeated = False self.input = np.array( @@ -66,7 +68,7 @@ class TestCTCAlignOp(OpTest): class TestCTCAlignOpCase1(TestCTCAlignOp): def config(self): self.op_type = "ctc_align" - self.input_lod = [[0, 11, 19]] + self.input_lod = [[11, 8]] self.blank = 0 self.merge_repeated = True self.input = np.array( @@ -77,7 +79,7 @@ class TestCTCAlignOpCase1(TestCTCAlignOp): class TestCTCAlignOpCase2(TestCTCAlignOp): def config(self): self.op_type = "ctc_align" - self.input_lod = [[0, 4]] + self.input_lod = [[4]] self.blank = 0 self.merge_repeated = True self.input = np.array([0, 0, 0, 0]).reshape([4, 1]).astype("int32") diff --git a/python/paddle/fluid/tests/unittests/test_data_balance.py b/python/paddle/fluid/tests/unittests/test_data_balance.py new file mode 100644 index 0000000000..6d810920d5 --- /dev/null +++ b/python/paddle/fluid/tests/unittests/test_data_balance.py @@ -0,0 +1,192 @@ +# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import unittest +import paddle.fluid as fluid +import paddle.v2 as paddle +import numpy as np + + +class TestDataBalance(unittest.TestCase): + def prepare_data(self): + def fake_data_generator(): + for n in xrange(self.total_ins_num): + yield np.ones((3, 4)) * n, n + + # Prepare data + with fluid.program_guard(fluid.Program(), fluid.Program()): + reader = paddle.batch( + fake_data_generator, batch_size=self.batch_size) + feeder = fluid.DataFeeder( + feed_list=[ + fluid.layers.data( + name='image', shape=[3, 4], dtype='float32'), + fluid.layers.data( + name='label', shape=[1], dtype='int64'), + ], + place=fluid.CPUPlace()) + self.num_batches = fluid.recordio_writer.convert_reader_to_recordio_file( + self.data_file_name, reader, feeder) + + def prepare_lod_data(self): + def fake_data_generator(): + for n in xrange(1, self.total_ins_num + 1): + d1 = (np.ones((n, 3)) * n).astype('float32') + d2 = (np.array(n).reshape((1, 1))).astype('int32') + yield d1, d2 + + # Prepare lod data + with fluid.program_guard(fluid.Program(), fluid.Program()): + with fluid.recordio_writer.create_recordio_writer( + filename=self.lod_data_file_name) as writer: + eof = False + generator = fake_data_generator() + while (not eof): + data_batch = [ + np.array([]).reshape((0, 3)), np.array([]).reshape( + (0, 1)) + ] + lod = [0] + for _ in xrange(self.batch_size): + try: + ins = generator.next() + except StopIteration: + eof = True + break + for i, d in enumerate(ins): + data_batch[i] = np.concatenate( + (data_batch[i], d), axis=0) + lod.append(lod[-1] + ins[0].shape[0]) + if data_batch[0].shape[0] > 0: + for i, d in enumerate(data_batch): + t = fluid.LoDTensor() + t.set(data_batch[i], fluid.CPUPlace()) + if i == 0: + t.set_lod([lod]) + writer.append_tensor(t) + writer.complete_append_tensor() + + def setUp(self): + self.use_cuda = fluid.core.is_compiled_with_cuda() + self.data_file_name = './data_balance_test.recordio' + self.lod_data_file_name = './data_balance_with_lod_test.recordio' + self.total_ins_num = 50 + self.batch_size = 10 + self.prepare_data() + self.prepare_lod_data() + + def main(self): + main_prog = fluid.Program() + startup_prog = fluid.Program() + with fluid.program_guard(main_prog, startup_prog): + data_reader = fluid.layers.io.open_files( + filenames=[self.data_file_name], + shapes=[[-1, 3, 4], [-1, 1]], + lod_levels=[0, 0], + dtypes=['float32', 'int64']) + if self.use_cuda: + data_reader = fluid.layers.double_buffer(data_reader) + image, label = fluid.layers.read_file(data_reader) + + place = fluid.CUDAPlace(0) if self.use_cuda else fluid.CPUPlace() + exe = fluid.Executor(place) + exe.run(startup_prog) + + build_strategy = fluid.BuildStrategy() + build_strategy.enable_data_balance = True + parallel_exe = fluid.ParallelExecutor( + use_cuda=self.use_cuda, + main_program=main_prog, + build_strategy=build_strategy) + + if (parallel_exe.device_count > self.batch_size): + print("WARNING: Unittest TestDataBalance skipped. \ + For the result is not correct when device count \ + is larger than batch size.") + exit(0) + fetch_list = [image.name, label.name] + + data_appeared = [False] * self.total_ins_num + while (True): + try: + image_val, label_val = parallel_exe.run(fetch_list, + return_numpy=True) + except fluid.core.EOFException: + break + ins_num = image_val.shape[0] + broadcasted_label = np.ones( + (ins_num, 3, 4)) * label_val.reshape((ins_num, 1, 1)) + self.assertEqual(image_val.all(), broadcasted_label.all()) + for l in label_val: + self.assertFalse(data_appeared[l[0]]) + data_appeared[l[0]] = True + for i in data_appeared: + self.assertTrue(i) + + def main_lod(self): + main_prog = fluid.Program() + startup_prog = fluid.Program() + with fluid.program_guard(main_prog, startup_prog): + data_reader = fluid.layers.io.open_files( + filenames=[self.lod_data_file_name], + shapes=[[-1, 3], [-1, 1]], + lod_levels=[1, 0], + dtypes=['float32', 'int32'], + thread_num=1) + ins, label = fluid.layers.read_file(data_reader) + + place = fluid.CUDAPlace(0) if self.use_cuda else fluid.CPUPlace() + exe = fluid.Executor(place) + exe.run(startup_prog) + build_strategy = fluid.BuildStrategy() + build_strategy.enable_data_balance = True + parallel_exe = fluid.ParallelExecutor( + use_cuda=self.use_cuda, + main_program=main_prog, + build_strategy=build_strategy) + + if (parallel_exe.device_count > self.batch_size): + print("WARNING: Unittest TestDataBalance skipped. \ + For the result is not correct when device count \ + is larger than batch size.") + exit(0) + fetch_list = [ins.name, label.name] + + data_appeared = [False] * self.total_ins_num + while (True): + try: + ins_tensor, label_tensor = parallel_exe.run( + fetch_list, return_numpy=False) + except fluid.core.EOFException: + break + + ins_val = np.array(ins_tensor) + label_val = np.array(label_tensor) + ins_lod = ins_tensor.lod()[0] + self.assertEqual(ins_val.shape[1], 3) + self.assertEqual(label_val.shape[1], 1) + self.assertEqual(len(ins_lod) - 1, label_val.shape[0]) + for i in range(0, len(ins_lod) - 1): + ins_elem = ins_val[ins_lod[i]:ins_lod[i + 1]][:] + label_elem = label_val[i][0] + self.assertEqual(ins_elem.all(), label_elem.all()) + self.assertFalse(data_appeared[int(label_elem - 1)]) + data_appeared[int(label_elem - 1)] = True + + for i in data_appeared: + self.assertTrue(i) + + def test_all(self): + self.main() + self.main_lod() diff --git a/python/paddle/fluid/tests/unittests/test_detection_map_op.py b/python/paddle/fluid/tests/unittests/test_detection_map_op.py index f545ad155c..05d3367ad8 100644 --- a/python/paddle/fluid/tests/unittests/test_detection_map_op.py +++ b/python/paddle/fluid/tests/unittests/test_detection_map_op.py @@ -74,13 +74,13 @@ class TestDetectionMAPOp(OpTest): self.evaluate_difficult = True self.ap_type = "integral" - self.label_lod = [[0, 2, 4]] + self.label_lod = [[2, 2]] # label difficult xmin ymin xmax ymax self.label = [[1, 0, 0.1, 0.1, 0.3, 0.3], [1, 1, 0.6, 0.6, 0.8, 0.8], [2, 0, 0.3, 0.3, 0.6, 0.5], [1, 0, 0.7, 0.1, 0.9, 0.3]] # label score xmin ymin xmax ymax difficult - self.detect_lod = [[0, 3, 7]] + self.detect_lod = [[3, 4]] self.detect = [ [1, 0.3, 0.1, 0.0, 0.4, 0.3], [1, 0.7, 0.0, 0.1, 0.2, 0.3], [1, 0.9, 0.7, 0.6, 0.8, 0.8], [2, 0.8, 0.2, 0.1, 0.4, 0.4], @@ -89,7 +89,7 @@ class TestDetectionMAPOp(OpTest): ] # label score true_pos false_pos - self.tf_pos_lod = [[0, 3, 7]] + self.tf_pos_lod = [[3, 4]] self.tf_pos = [[1, 0.9, 1, 0], [1, 0.7, 1, 0], [1, 0.3, 0, 1], [1, 0.2, 1, 0], [2, 0.8, 0, 1], [2, 0.1, 1, 0], [3, 0.2, 0, 1]] @@ -112,15 +112,19 @@ class TestDetectionMAPOp(OpTest): for i, count in enumerate(class_pos_count): class_pos_count_dict[i] = count - for i in range(len(true_pos_lod[0]) - 1): - start = true_pos_lod[0][i] - end = true_pos_lod[0][i + 1] + cur_pos = 0 + for i in range(len(true_pos_lod[0])): + start = cur_pos + cur_pos += true_pos_lod[0][i] + end = cur_pos for j in range(start, end): true_pos_dict[i].append(true_pos[j]) - for i in range(len(false_pos_lod[0]) - 1): - start = false_pos_lod[0][i] - end = false_pos_lod[0][i + 1] + cur_pos = 0 + for i in range(len(false_pos_lod[0])): + start = cur_pos + cur_pos += false_pos_lod[0][i] + end = cur_pos for j in range(start, end): false_pos_dict[i].append(false_pos[j]) @@ -130,19 +134,19 @@ class TestDetectionMAPOp(OpTest): label_number = self.class_num out_class_pos_count = [] - out_true_pos_lod = [0] + out_true_pos_lod = [] out_true_pos = [] - out_false_pos_lod = [0] + out_false_pos_lod = [] out_false_pos = [] for i in range(label_number): out_class_pos_count.append([label_count[i]]) true_pos_list = true_pos[i] out_true_pos += true_pos_list - out_true_pos_lod.append(len(out_true_pos)) + out_true_pos_lod.append(len(true_pos_list)) false_pos_list = false_pos[i] out_false_pos += false_pos_list - out_false_pos_lod.append(len(out_false_pos)) + out_false_pos_lod.append(len(false_pos_list)) return out_class_pos_count, out_true_pos, [ out_true_pos_lod @@ -241,7 +245,7 @@ class TestDetectionMAPOpSkipDiff(TestDetectionMAPOp): self.evaluate_difficult = False - self.tf_pos_lod = [[0, 2, 6]] + self.tf_pos_lod = [[2, 4]] # label score true_pos false_pos self.tf_pos = [[1, 0.7, 1, 0], [1, 0.3, 0, 1], [1, 0.2, 1, 0], [2, 0.8, 0, 1], [2, 0.1, 1, 0], [3, 0.2, 0, 1]] @@ -267,9 +271,9 @@ class TestDetectionMAPOpMultiBatch(TestDetectionMAPOp): def init_test_case(self): super(TestDetectionMAPOpMultiBatch, self).init_test_case() self.class_pos_count = [0, 2, 1] - self.true_pos_lod = [[0, 0, 3, 5]] + self.true_pos_lod = [[0, 3, 2]] self.true_pos = [[0.7, 1.], [0.3, 0.], [0.2, 1.], [0.8, 0.], [0.1, 1.]] - self.false_pos_lod = [[0, 0, 3, 5]] + self.false_pos_lod = [[0, 3, 2]] self.false_pos = [[0.7, 0.], [0.3, 1.], [0.2, 0.], [0.8, 1.], [0.1, 0.]] diff --git a/python/paddle/fluid/tests/unittests/test_dist_mnist.py b/python/paddle/fluid/tests/unittests/test_dist_mnist.py new file mode 100644 index 0000000000..ad2d57f7c5 --- /dev/null +++ b/python/paddle/fluid/tests/unittests/test_dist_mnist.py @@ -0,0 +1,210 @@ +# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import numpy as np +import argparse +import time +import math + +import paddle +import paddle.fluid as fluid +import paddle.fluid.profiler as profiler +from paddle.fluid import core +import unittest +from multiprocessing import Process +import os +import signal + +SEED = 1 +DTYPE = "float32" +paddle.dataset.mnist.fetch() + + +# random seed must set before configuring the network. +# fluid.default_startup_program().random_seed = SEED +def cnn_model(data): + conv_pool_1 = fluid.nets.simple_img_conv_pool( + input=data, + filter_size=5, + num_filters=20, + pool_size=2, + pool_stride=2, + act="relu") + conv_pool_2 = fluid.nets.simple_img_conv_pool( + input=conv_pool_1, + filter_size=5, + num_filters=50, + pool_size=2, + pool_stride=2, + act="relu") + + # TODO(dzhwinter) : refine the initializer and random seed settting + SIZE = 10 + input_shape = conv_pool_2.shape + param_shape = [reduce(lambda a, b: a * b, input_shape[1:], 1)] + [SIZE] + scale = (2.0 / (param_shape[0]**2 * SIZE))**0.5 + + predict = fluid.layers.fc( + input=conv_pool_2, + size=SIZE, + act="softmax", + param_attr=fluid.param_attr.ParamAttr( + initializer=fluid.initializer.NormalInitializer( + loc=0.0, scale=scale))) + return predict + + +def get_model(batch_size): + # Input data + images = fluid.layers.data(name='pixel', shape=[1, 28, 28], dtype=DTYPE) + label = fluid.layers.data(name='label', shape=[1], dtype='int64') + + # Train program + predict = cnn_model(images) + cost = fluid.layers.cross_entropy(input=predict, label=label) + avg_cost = fluid.layers.mean(x=cost) + + # Evaluator + batch_size_tensor = fluid.layers.create_tensor(dtype='int64') + batch_acc = fluid.layers.accuracy( + input=predict, label=label, total=batch_size_tensor) + + inference_program = fluid.default_main_program().clone() + # Optimization + opt = fluid.optimizer.AdamOptimizer( + learning_rate=0.001, beta1=0.9, beta2=0.999) + + # Reader + train_reader = paddle.batch( + paddle.dataset.mnist.train(), batch_size=batch_size) + test_reader = paddle.batch( + paddle.dataset.mnist.test(), batch_size=batch_size) + opt.minimize(avg_cost) + return inference_program, avg_cost, train_reader, test_reader, batch_acc, predict + + +def get_transpiler(trainer_id, main_program, pserver_endpoints, trainers): + t = fluid.DistributeTranspiler() + t.transpile( + trainer_id=trainer_id, + program=main_program, + pservers=pserver_endpoints, + trainers=trainers) + return t + + +def run_pserver(pserver_endpoints, trainers, current_endpoint): + get_model(batch_size=20) + t = get_transpiler(0, + fluid.default_main_program(), pserver_endpoints, + trainers) + pserver_prog = t.get_pserver_program(current_endpoint) + startup_prog = t.get_startup_program(current_endpoint, pserver_prog) + + place = fluid.CPUPlace() + exe = fluid.Executor(place) + exe.run(startup_prog) + + exe.run(pserver_prog) + + +class TestDistMnist(unittest.TestCase): + def setUp(self): + self._trainers = 1 + self._pservers = 1 + self._ps_endpoints = "127.0.0.1:9123" + + def start_pserver(self, endpoint): + p = Process( + target=run_pserver, + args=(self._ps_endpoints, self._trainers, endpoint)) + p.start() + return p.pid + + def _wait_ps_ready(self, pid): + retry_times = 5 + while True: + assert retry_times >= 0, "wait ps ready failed" + time.sleep(1) + try: + # the listen_and_serv_op would touch a file which contains the listen port + # on the /tmp directory until it was ready to process all the RPC call. + os.stat("/tmp/paddle.%d.port" % pid) + return + except os.error: + retry_times -= 1 + + def stop_pserver(self, pid): + os.kill(pid, signal.SIGTERM) + + def test_with_place(self): + p = fluid.CUDAPlace(0) if core.is_compiled_with_cuda( + ) else fluid.CPUPlace() + + pserver_pid = self.start_pserver(self._ps_endpoints) + self._wait_ps_ready(pserver_pid) + + self.run_trainer(p, 0) + + self.stop_pserver(pserver_pid) + + def run_trainer(self, place, trainer_id): + test_program, avg_cost, train_reader, test_reader, batch_acc, predict = get_model( + batch_size=20) + t = get_transpiler(trainer_id, + fluid.default_main_program(), self._ps_endpoints, + self._trainers) + + trainer_prog = t.get_trainer_program() + + exe = fluid.Executor(place) + exe.run(fluid.default_startup_program()) + + feed_var_list = [ + var for var in trainer_prog.global_block().vars.itervalues() + if var.is_data + ] + + feeder = fluid.DataFeeder(feed_var_list, place) + for pass_id in xrange(10): + for batch_id, data in enumerate(train_reader()): + exe.run(trainer_prog, feed=feeder.feed(data)) + + if (batch_id + 1) % 10 == 0: + acc_set = [] + avg_loss_set = [] + for test_data in test_reader(): + acc_np, avg_loss_np = exe.run( + program=test_program, + feed=feeder.feed(test_data), + fetch_list=[batch_acc, avg_cost]) + acc_set.append(float(acc_np)) + avg_loss_set.append(float(avg_loss_np)) + # get test acc and loss + acc_val = np.array(acc_set).mean() + avg_loss_val = np.array(avg_loss_set).mean() + if float(acc_val + ) > 0.8: # Smaller value to increase CI speed + return + else: + print( + 'PassID {0:1}, BatchID {1:04}, Test Loss {2:2.2}, Acc {3:2.2}'. + format(pass_id, batch_id + 1, + float(avg_loss_val), float(acc_val))) + if math.isnan(float(avg_loss_val)): + assert ("got Nan loss, training failed.") + + +if __name__ == "__main__": + unittest.main() diff --git a/python/paddle/fluid/tests/unittests/test_dist_train.py b/python/paddle/fluid/tests/unittests/test_dist_train.py index 2314bb2ed8..562e66b062 100644 --- a/python/paddle/fluid/tests/unittests/test_dist_train.py +++ b/python/paddle/fluid/tests/unittests/test_dist_train.py @@ -16,6 +16,7 @@ import os import time import unittest from multiprocessing import Process +import signal import numpy @@ -24,9 +25,6 @@ import paddle.fluid.layers as layers class TestSendOp(unittest.TestCase): - @unittest.skip( - "This test is buggy. We cannot use time.sleep to sync processes, the connection may fail in unittest." - ) def test_send(self): # Run init_serv in a thread place = fluid.CPUPlace() @@ -35,7 +33,9 @@ class TestSendOp(unittest.TestCase): p.daemon = True p.start() - time.sleep(10) + self.ps_timeout = 5 + self._wait_ps_ready(p.pid) + with open("/tmp/paddle.%d.port" % p.pid, "r") as fn: selected_port = int(fn.readlines()[0]) self.init_client(place, selected_port) @@ -44,9 +44,23 @@ class TestSendOp(unittest.TestCase): self.assertTrue(numpy.allclose(self.local_out, self.dist_out)) # FIXME(typhoonzero): find a way to gracefully shutdown the server. - os.system("kill -9 %d" % p.pid) + os.kill(p.pid, signal.SIGKILL) p.join() + def _wait_ps_ready(self, pid): + start_left_time = self.ps_timeout + sleep_time = 0.5 + while True: + assert start_left_time >= 0, "wait ps ready failed" + time.sleep(sleep_time) + try: + # the listen_and_serv_op would touch a file which contains the listen port + # on the /tmp directory until it was ready to process all the RPC call. + os.stat("/tmp/paddle.%d.port" % pid) + return + except os.error: + start_left_time -= sleep_time + def init_serv(self, place): main = fluid.Program() @@ -84,7 +98,10 @@ class TestSendOp(unittest.TestCase): dtype="float32", persistable=False, shape=[32, 32]) - o = layers.Send("127.0.0.1:%d" % port, [x], [get_var]) + fluid.initializer.Constant(value=2.3)(get_var, main.global_block()) + layers.Send("127.0.0.1:%d" % port, [x]) + o = layers.Recv("127.0.0.1:%d" % port, [get_var]) + exe = fluid.Executor(place) self.dist_out = exe.run(main, fetch_list=o) # o is a list diff --git a/python/paddle/fluid/tests/unittests/test_dist_transpiler.py b/python/paddle/fluid/tests/unittests/test_dist_transpiler.py index b4379ad447..75b4b4e50d 100644 --- a/python/paddle/fluid/tests/unittests/test_dist_transpiler.py +++ b/python/paddle/fluid/tests/unittests/test_dist_transpiler.py @@ -15,51 +15,248 @@ import unittest import paddle.fluid as fluid from paddle.fluid.transpiler.distribute_transpiler import delete_ops +import traceback -from transpiler_test import TranspilerTest - -class TestDistTranspiler(TranspilerTest): +class TranspilerTest(unittest.TestCase): def setUp(self): - self.current_pserver_ep = "127.0.0.1:6174" + self.trainer_id = 0 + self.trainers = 2 + self.pservers = 2 + # NOTE: we do not actually bind this port + self.pserver_eps = "127.0.0.1:6174,127.0.0.1:6175" + self.pserver1_ep = "127.0.0.1:6174" + self.pserver2_ep = "127.0.0.1:6175" + self.slice_var_up = True + self.sync_mode = True + self.transpiler = None + + def net_conf(self): + x = fluid.layers.data(name='x', shape=[1000], dtype='float32') + y_predict = fluid.layers.fc(input=x, + size=1000, + act=None, + param_attr=fluid.ParamAttr(name='fc_w'), + bias_attr=fluid.ParamAttr(name='fc_b')) + y = fluid.layers.data(name='y', shape=[1], dtype='float32') + cost = fluid.layers.square_error_cost(input=y_predict, label=y) + avg_cost = fluid.layers.mean(cost) + sgd_optimizer = fluid.optimizer.SGD(learning_rate=0.1) + sgd_optimizer.minimize(avg_cost) + return + + def get_main_program(self): + main = fluid.Program() + with fluid.program_guard(main): + self.net_conf() + self.origin_prog = main.clone() + return main + + def get_trainer(self): + t = self._transpiler_instance() + return t.get_trainer_program() + + def get_pserver(self, ep): + t = self._transpiler_instance() + pserver = t.get_pserver_program(ep) + startup = t.get_startup_program(ep, pserver) + return pserver, startup + + def _transpiler_instance(self): + if not self.transpiler: + main = self.get_main_program() + self.transpiler = fluid.DistributeTranspiler() + self.transpiler.transpile( + self.trainer_id, + program=main, + pservers=self.pserver_eps, + trainers=self.trainers, + slice_var_up=self.slice_var_up, + sync_mode=self.sync_mode) + return self.transpiler + +class TestBasicModel(TranspilerTest): def test_transpiler(self): + pserver, startup = self.get_pserver(self.pserver1_ep) + pserver2, startup2 = self.get_pserver(self.pserver2_ep) + trainer = self.get_trainer() - pserver, startup = self.get_pserver(self.current_pserver_ep) - self.assertEqual([op.type for op in trainer.global_block().ops], - self.get_expect_trainer_ops()) + + self.assertEqual([op.type for op in trainer.global_block().ops], [ + 'mul', 'elementwise_add', 'elementwise_sub', 'square', 'mean', + 'fill_constant', 'mean_grad', 'square_grad', 'elementwise_sub_grad', + 'elementwise_add_grad', 'send', 'mul_grad', 'split_byref', 'send', + 'send_barrier', 'recv', 'recv', 'fetch_barrier', 'concat' + ]) self.assertEqual(len(pserver.blocks), 3) # block0: listen_and_serv self.assertEqual([op.type for op in pserver.blocks[0].ops], ["listen_and_serv"]) - # block2: optimize pass + # block1~2: optimize pass self.assertEqual([op.type for op in pserver.blocks[1].ops], ["sum", "scale", "sgd"]) - # confirm startup program - - self.assertEqual([op.type for op in startup.global_block().ops], [ - "fill_constant", "fill_constant", "uniform_random", "uniform_random" - ]) - + self.assertEqual([op.type for op in startup.global_block().ops], + ["fill_constant", "fill_constant", "uniform_random"]) # the variable #fc_w will be split into two blocks fc_w_var = startup.global_block().var("fc_w.block1") self.assertEqual(fc_w_var.shape, (500, 1000)) + # all parameters should be optimized on pserver + + pserver_params = [] + for prog in [pserver, pserver2]: + for blk in prog.blocks: + for op in blk.ops: + if "Param" in op.input_names: + param_name = op.input("Param")[0] + is_block_idx = param_name.find(".block") + if is_block_idx != -1: + origin_param_name = param_name[:is_block_idx] + else: + origin_param_name = param_name + pserver_params.append(origin_param_name) + trainer_params = [] + for op in self.origin_prog.global_block().ops: + if "Param" in op.input_names: + trainer_params.append(op.input("Param")[0]) + self.assertEqual(set(pserver_params), set(trainer_params)) + + +class TestNoSliceVar(TranspilerTest): + def setUp(self): + super(TestNoSliceVar, self).setUp() + self.slice_var_up = False + + def test_transpiler(self): + _, startup = self.get_pserver(self.pserver1_ep) + _, startup2 = self.get_pserver(self.pserver2_ep) + + if startup.global_block().vars.has_key("fc_w"): + fc_w_var = startup.global_block().vars["fc_w"] + elif startup2.global_block().vars.has_key("fc_w"): + fc_w_var = startup2.global_block().vars["fc_w"] + + self.assertEqual(fc_w_var.shape, (1000, 1000)) - def get_expect_trainer_ops(self): - trainer = fluid.Program() - with fluid.program_guard(trainer): - optimize_ops, params_grads = self.net_conf() +class TestLRDecay(TranspilerTest): + def net_conf(self): + x = fluid.layers.data(name='x', shape=[1000], dtype='float32') + y_predict = fluid.layers.fc(input=x, + size=1000, + act=None, + param_attr=fluid.ParamAttr(name='fc_w'), + bias_attr=fluid.ParamAttr(name='fc_b')) + y = fluid.layers.data(name='y', shape=[1], dtype='float32') + cost = fluid.layers.square_error_cost(input=y_predict, label=y) + avg_cost = fluid.layers.mean(cost) + sgd_optimizer = fluid.optimizer.SGD( + learning_rate=fluid.layers.exponential_decay( + learning_rate=1.0, + decay_steps=2100, + decay_rate=0.1, + staircase=True)) + sgd_optimizer.minimize(avg_cost) + return + + def test_transpiler(self): + pserver, startup = self.get_pserver(self.pserver1_ep) + trainer = self.get_trainer() + + self.assertEqual(len(pserver.blocks), 4) + lr_decay_ops = [op.type for op in pserver.blocks[1].ops] + self.assertEqual(lr_decay_ops, [ + "increment", "cast", "fill_constant", "elementwise_div", "floor", + "fill_constant", "elementwise_pow", "fill_constant", + "elementwise_mul" + ]) + + +class TestLRDecayConditional(TranspilerTest): + def net_conf(self): + x = fluid.layers.data(name='x', shape=[1000], dtype='float32') + y_predict = fluid.layers.fc(input=x, + size=1000, + act=None, + param_attr=fluid.ParamAttr(name='fc_w'), + bias_attr=fluid.ParamAttr(name='fc_b')) + y = fluid.layers.data(name='y', shape=[1], dtype='float32') + cost = fluid.layers.square_error_cost(input=y_predict, label=y) + avg_cost = fluid.layers.mean(cost) + sgd_optimizer = fluid.optimizer.SGD( + learning_rate=fluid.layers.piecewise_decay([10000, 20000], + [1.0, 0.5, 1.0])) + sgd_optimizer.minimize(avg_cost) + return + + def test_transpiler(self): + pserver, startup = self.get_pserver(self.pserver1_ep) + trainer = self.get_trainer() + + serv_op = pserver.blocks[0].ops[0] + sub_blocks = [] + optimize_blocks = [] + for b in serv_op.attrs["optimize_blocks"]: + optimize_blocks.append(b.idx) + for b in pserver.blocks: + if b.idx not in optimize_blocks: + sub_blocks.append(b.idx) + + self.assertEqual(len(pserver.blocks), 7) + lr_decay_ops = [op.type for op in pserver.blocks[1].ops] + self.assertEqual(lr_decay_ops, [ + "increment", "cast", "fill_constant", "fill_constant", "less_than", + "logical_not", "conditional_block", "fill_constant", + "fill_constant", "less_than", "logical_not", "logical_and", + "logical_and", "conditional_block", "fill_constant", + "conditional_block" + ]) + # test the condition blocks + for b in sub_blocks: + if b == 0: + continue + block = pserver.blocks[b] + self.assertEqual([op.type for op in block.ops], ["assign"]) + + +class TestL2Decay(TranspilerTest): + def net_conf(self): + x = fluid.layers.data(name='x', shape=[1000], dtype='float32') + y_predict = fluid.layers.fc( + input=x, + size=1000, + act=None, + param_attr=fluid.ParamAttr( + name='fc_w', + regularizer=fluid.regularizer.L2Decay(), + gradient_clip=fluid.clip.GradientClipByValue(0.1)), + bias_attr=fluid.ParamAttr(name='fc_b')) + y = fluid.layers.data(name='y', shape=[1], dtype='float32') + cost = fluid.layers.square_error_cost(input=y_predict, label=y) + avg_cost = fluid.layers.mean(cost) + sgd_optimizer = fluid.optimizer.SGD(learning_rate=0.1) + sgd_optimizer.minimize(avg_cost) + return + + def test_transpiler(self): + pserver, startup = self.get_pserver(self.pserver1_ep) + trainer = self.get_trainer() + + self.assertEqual(len(pserver.blocks), 3) + self.assertEqual([op.type for op in pserver.blocks[1].ops], + ["sum", "scale", "clip", "sgd"]) + self.assertEqual( + [op.type for op in pserver.blocks[2].ops], + ["sum", "scale", "clip", "scale", "elementwise_add", "sgd"]) + # TODO(typhoonzero): test clipping and L2Decay ops are removed from trainer + - delete_ops(trainer.global_block(), optimize_ops) - ops = [op.type for op in trainer.global_block().ops] + [ - "split_byref", "send", "send_barrier", "recv", "recv", - "fetch_barrier", "concat" - ] - ops.insert(ops.index("elementwise_add_grad") + 1, "send") - return ops + # FIXME(typhoonzero): need to add test for async case: + # see https://github.com/PaddlePaddle/Paddle/issues/11691 +class TestAsyncSGD(TranspilerTest): + pass if __name__ == "__main__": diff --git a/python/paddle/fluid/tests/unittests/test_dist_word2vec.py b/python/paddle/fluid/tests/unittests/test_dist_word2vec.py new file mode 100644 index 0000000000..712fd5849d --- /dev/null +++ b/python/paddle/fluid/tests/unittests/test_dist_word2vec.py @@ -0,0 +1,203 @@ +# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import numpy as np +import argparse +import time +import math +import paddle +import paddle.fluid as fluid +import paddle.fluid.profiler as profiler +from paddle.fluid import core +import unittest +from multiprocessing import Process +import os +import signal + +IS_SPARSE = True +EMBED_SIZE = 32 +HIDDEN_SIZE = 256 +N = 5 +BATCH_SIZE = 32 +ExecutionStrategy = core.ParallelExecutor.ExecutionStrategy + + +def get_model(): + def __network__(words): + embed_first = fluid.layers.embedding( + input=words[0], + size=[dict_size, EMBED_SIZE], + dtype='float32', + is_sparse=IS_SPARSE, + param_attr='shared_w') + embed_second = fluid.layers.embedding( + input=words[1], + size=[dict_size, EMBED_SIZE], + dtype='float32', + is_sparse=IS_SPARSE, + param_attr='shared_w') + embed_third = fluid.layers.embedding( + input=words[2], + size=[dict_size, EMBED_SIZE], + dtype='float32', + is_sparse=IS_SPARSE, + param_attr='shared_w') + embed_forth = fluid.layers.embedding( + input=words[3], + size=[dict_size, EMBED_SIZE], + dtype='float32', + is_sparse=IS_SPARSE, + param_attr='shared_w') + + concat_embed = fluid.layers.concat( + input=[embed_first, embed_second, embed_third, embed_forth], axis=1) + hidden1 = fluid.layers.fc(input=concat_embed, + size=HIDDEN_SIZE, + act='sigmoid') + predict_word = fluid.layers.fc(input=hidden1, + size=dict_size, + act='softmax') + cost = fluid.layers.cross_entropy(input=predict_word, label=words[4]) + avg_cost = fluid.layers.mean(cost) + return avg_cost, predict_word + + word_dict = paddle.dataset.imikolov.build_dict() + dict_size = len(word_dict) + + first_word = fluid.layers.data(name='firstw', shape=[1], dtype='int64') + second_word = fluid.layers.data(name='secondw', shape=[1], dtype='int64') + third_word = fluid.layers.data(name='thirdw', shape=[1], dtype='int64') + forth_word = fluid.layers.data(name='forthw', shape=[1], dtype='int64') + next_word = fluid.layers.data(name='nextw', shape=[1], dtype='int64') + avg_cost, predict_word = __network__( + [first_word, second_word, third_word, forth_word, next_word]) + + inference_program = paddle.fluid.default_main_program().clone() + + sgd_optimizer = fluid.optimizer.SGD(learning_rate=0.001) + sgd_optimizer.minimize(avg_cost) + + train_reader = paddle.batch( + paddle.dataset.imikolov.train(word_dict, N), BATCH_SIZE) + test_reader = paddle.batch( + paddle.dataset.imikolov.test(word_dict, N), BATCH_SIZE) + + return inference_program, avg_cost, train_reader, test_reader, predict_word + + +def get_transpiler(trainer_id, main_program, pserver_endpoints, trainers): + t = fluid.DistributeTranspiler() + t.transpile( + trainer_id=trainer_id, + program=main_program, + pservers=pserver_endpoints, + trainers=trainers) + return t + + +def run_pserver(pserver_endpoints, trainers, current_endpoint): + get_model() + t = get_transpiler(0, + fluid.default_main_program(), pserver_endpoints, + trainers) + pserver_prog = t.get_pserver_program(current_endpoint) + startup_prog = t.get_startup_program(current_endpoint, pserver_prog) + + place = fluid.CPUPlace() + exe = fluid.Executor(place) + exe.run(startup_prog) + + exe.run(pserver_prog) + + +class TestDistMnist(unittest.TestCase): + def setUp(self): + self._trainers = 1 + self._pservers = 1 + self._ps_endpoints = "127.0.0.1:9123" + + def start_pserver(self, endpoint): + p = Process( + target=run_pserver, + args=(self._ps_endpoints, self._trainers, endpoint)) + p.start() + return p.pid + + def _wait_ps_ready(self, pid): + retry_times = 5 + while True: + assert retry_times >= 0, "wait ps ready failed" + time.sleep(1) + try: + # the listen_and_serv_op would touch a file which contains the listen port + # on the /tmp directory until it was ready to process all the RPC call. + os.stat("/tmp/paddle.%d.port" % pid) + return + except os.error: + retry_times -= 1 + + def stop_pserver(self, pid): + os.kill(pid, signal.SIGKILL) + + def test_with_place(self): + p = fluid.CUDAPlace(0) if core.is_compiled_with_cuda( + ) else fluid.CPUPlace() + + pserver_pid = self.start_pserver(self._ps_endpoints) + self._wait_ps_ready(pserver_pid) + + self.run_trainer(p, 0) + + self.stop_pserver(pserver_pid) + + def run_trainer(self, place, trainer_id): + test_program, avg_cost, train_reader, test_reader, predict = get_model() + t = get_transpiler(trainer_id, + fluid.default_main_program(), self._ps_endpoints, + self._trainers) + + trainer_prog = t.get_trainer_program() + + exe = fluid.Executor(place) + exe.run(fluid.default_startup_program()) + + use_gpu = True if core.is_compiled_with_cuda() else False + + exec_strategy = ExecutionStrategy() + exec_strategy.use_cuda = use_gpu + train_exe = fluid.ParallelExecutor( + use_cuda=use_gpu, + main_program=trainer_prog, + loss_name=avg_cost.name, + exec_strategy=exec_strategy) + + feed_var_list = [ + var for var in trainer_prog.global_block().vars.itervalues() + if var.is_data + ] + + feeder = fluid.DataFeeder(feed_var_list, place) + for pass_id in xrange(10): + for batch_id, data in enumerate(train_reader()): + avg_loss_np = train_exe.run(feed=feeder.feed(data), + fetch_list=[avg_cost.name]) + loss = np.array(avg_loss_np).mean() + if float(loss) < 5.0: + return + if math.isnan(loss): + assert ("Got Nan loss, training failed") + + +if __name__ == "__main__": + unittest.main() diff --git a/python/paddle/fluid/tests/unittests/test_dynrnn_gradient_check.py b/python/paddle/fluid/tests/unittests/test_dynrnn_gradient_check.py index 95af51f1b2..0f289af284 100644 --- a/python/paddle/fluid/tests/unittests/test_dynrnn_gradient_check.py +++ b/python/paddle/fluid/tests/unittests/test_dynrnn_gradient_check.py @@ -136,16 +136,16 @@ class BaseRNN(object): feed_dict = dict() for iname in self.inputs: - lod = [0] + lod = [] np_flatten = [] for seq_id in xrange(len(self.inputs[iname])): seq_len = len(self.inputs[iname][seq_id]) - lod.append(lod[-1] + seq_len) + lod.append(seq_len) np_flatten.extend(self.inputs[iname][seq_id]) t = fluid.Tensor() t.set(numpy.array(np_flatten), place) - t.set_lod([lod]) + t.set_recursive_sequence_lengths([lod]) feed_dict[iname] = t for pname in self.params: diff --git a/python/paddle/fluid/tests/unittests/test_dynrnn_static_input.py b/python/paddle/fluid/tests/unittests/test_dynrnn_static_input.py index d3f63ee2c4..92e718662d 100644 --- a/python/paddle/fluid/tests/unittests/test_dynrnn_static_input.py +++ b/python/paddle/fluid/tests/unittests/test_dynrnn_static_input.py @@ -39,20 +39,20 @@ class TestDyRnnStaticInput(unittest.TestCase): def prepare_x_tensor(self): self.x_tensor_dim = 10 - lod = [[0, 2, 3, 6]] - shape = [lod[0][-1], self.x_tensor_dim] + lod = [[2, 1, 3]] + shape = [sum(lod[0]), self.x_tensor_dim] self.x_tensor_data = np.random.random(shape).astype('float32') self.x_tensor = core.LoDTensor() - self.x_tensor.set_lod(lod) + self.x_tensor.set_recursive_sequence_lengths(lod) self.x_tensor.set(self.x_tensor_data, self.place) def prepare_static_input_tensor(self): self.static_input_tensor_dim = 4 - lod = [[0, 1, 3, 6]] - shape = [lod[0][-1], self.static_input_tensor_dim] + lod = [[1, 2, 3]] + shape = [sum(lod[0]), self.static_input_tensor_dim] self.static_input_data = np.random.random(shape).astype('float32') self.static_input_tensor = core.LoDTensor() - self.static_input_tensor.set_lod(lod) + self.static_input_tensor.set_recursive_sequence_lengths(lod) self.static_input_tensor.set(self.static_input_data, self.place) def fetch_value(self, var): @@ -69,7 +69,7 @@ class TestDyRnnStaticInput(unittest.TestCase): ndarray = np.zeros(shape=dims).astype('float32') for i in xrange(np.product(dims)): ndarray.ravel()[i] = lod_tensor.get_float_element(i) - return ndarray, lod_tensor.lod() + return ndarray, lod_tensor.recursive_sequence_lengths() def build_graph(self, only_forward=False): x_tensor = fluid.layers.data( @@ -131,21 +131,20 @@ class TestDyRnnStaticInput(unittest.TestCase): framework.grad_var_name('static_input_tensor')) return static_input_grad, loss - def get_seq_len_from_lod(self, lod): - return [lod[0][i + 1] - lod[0][i] for i in xrange(len(lod[0]) - 1)] - def get_expected_static_step_outs(self): - x_lod = self.x_tensor.lod() - x_seq_len = self.get_seq_len_from_lod(x_lod) + x_lod = self.x_tensor.recursive_sequence_lengths() + x_seq_len = x_lod[0] x_seq_len_sorted = sorted(x_seq_len) x_sorted_indices = np.argsort(x_seq_len)[::-1] - static_lod = self.static_input_tensor.lod() - static_sliced = [ - self.static_input_data[static_lod[0][i]:static_lod[0][i + 1]] - for i in xrange(len(static_lod[0]) - 1) - ] - static_seq_len = self.get_seq_len_from_lod(static_lod) + static_lod = self.static_input_tensor.recursive_sequence_lengths() + static_sliced = [] + cur_offset = 0 + for i in xrange(len(static_lod[0])): + static_sliced.append(self.static_input_data[cur_offset:( + cur_offset + static_lod[0][i])]) + cur_offset += static_lod[0][i] + static_seq_len = static_lod[0] static_reordered = [] for i in xrange(len(x_sorted_indices)): static_reordered.extend(static_sliced[x_sorted_indices[i]].tolist()) @@ -159,11 +158,13 @@ class TestDyRnnStaticInput(unittest.TestCase): for i in xrange(self._max_sequence_len): end = len(x_seq_len) - bisect.bisect_left(x_seq_len_sorted, i + 1) - lod = [0] + lod = [] + total_len = 0 for i in xrange(end): - lod.append(static_seq_len_reordered[i] + lod[-1]) + lod.append(static_seq_len_reordered[i]) + total_len += lod[-1] static_step_lods.append([lod]) - end = lod[-1] + end = total_len static_step_outs.append( np.array(static_reordered[:end]).astype('float32')) @@ -199,7 +200,9 @@ class TestDyRnnStaticInput(unittest.TestCase): self.static_input_tensor.set_float_element(i, origin) numeric_gradients.ravel()[i] = (y_pos - y_neg) / self._delta / 2 self.assertTrue(np.allclose(actual_gradients, numeric_gradients, 0.001)) - self.assertTrue(np.allclose(actual_lod, self.static_input_tensor.lod())) + self.assertTrue( + np.allclose(actual_lod, + self.static_input_tensor.recursive_sequence_lengths())) if __name__ == '__main__': diff --git a/python/paddle/fluid/tests/unittests/test_edit_distance_op.py b/python/paddle/fluid/tests/unittests/test_edit_distance_op.py index 2957fb5058..816562621b 100644 --- a/python/paddle/fluid/tests/unittests/test_edit_distance_op.py +++ b/python/paddle/fluid/tests/unittests/test_edit_distance_op.py @@ -52,23 +52,29 @@ class TestEditDistanceOp(OpTest): def setUp(self): self.op_type = "edit_distance" normalized = False - x1 = np.array([[0, 12, 3, 5, 8, 2]]).astype("int64") - x2 = np.array([[0, 12, 4, 7, 8]]).astype("int64") + x1 = np.array([[12, 3, 5, 8, 2]]).astype("int64") + x2 = np.array([[12, 4, 7, 8]]).astype("int64") x1 = np.transpose(x1) x2 = np.transpose(x2) - x1_lod = [0, 1, 5] - x2_lod = [0, 3, 4] + x1_lod = [1, 4] + x2_lod = [3, 1] - num_strs = len(x1_lod) - 1 + num_strs = len(x1_lod) distance = np.zeros((num_strs, 1)).astype("float32") sequence_num = np.array(2).astype("int64") + + x1_offset = 0 + x2_offset = 0 for i in range(0, num_strs): distance[i] = Levenshtein( - hyp=x1[x1_lod[i]:x1_lod[i + 1]], - ref=x2[x2_lod[i]:x2_lod[i + 1]]) + hyp=x1[x1_offset:(x1_offset + x1_lod[i])], + ref=x2[x2_offset:(x2_offset + x2_lod[i])]) + x1_offset += x1_lod[i] + x2_offset += x2_lod[i] if normalized is True: - len_ref = x2_lod[i + 1] - x2_lod[i] + len_ref = x2_lod[i] distance[i] = distance[i] / len_ref + self.attrs = {'normalized': normalized} self.inputs = {'Hyps': (x1, [x1_lod]), 'Refs': (x2, [x2_lod])} self.outputs = {'Out': distance, 'SequenceNum': sequence_num} @@ -81,23 +87,29 @@ class TestEditDistanceOpNormalized(OpTest): def setUp(self): self.op_type = "edit_distance" normalized = True - x1 = np.array([[0, 10, 3, 6, 5, 8, 2]]).astype("int64") - x2 = np.array([[0, 10, 4, 6, 7, 8]]).astype("int64") + x1 = np.array([[10, 3, 6, 5, 8, 2]]).astype("int64") + x2 = np.array([[10, 4, 6, 7, 8]]).astype("int64") x1 = np.transpose(x1) x2 = np.transpose(x2) - x1_lod = [0, 1, 3, 6] - x2_lod = [0, 2, 3, 5] + x1_lod = [1, 2, 3] + x2_lod = [2, 1, 2] - num_strs = len(x1_lod) - 1 + num_strs = len(x1_lod) distance = np.zeros((num_strs, 1)).astype("float32") sequence_num = np.array(3).astype("int64") + + x1_offset = 0 + x2_offset = 0 for i in range(0, num_strs): distance[i] = Levenshtein( - hyp=x1[x1_lod[i]:x1_lod[i + 1]], - ref=x2[x2_lod[i]:x2_lod[i + 1]]) + hyp=x1[x1_offset:(x1_offset + x1_lod[i])], + ref=x2[x2_offset:(x2_offset + x2_lod[i])]) + x1_offset += x1_lod[i] + x2_offset += x2_lod[i] if normalized is True: - len_ref = x2_lod[i + 1] - x2_lod[i] + len_ref = x2_lod[i] distance[i] = distance[i] / len_ref + self.attrs = {'normalized': normalized} self.inputs = {'Hyps': (x1, [x1_lod]), 'Refs': (x2, [x2_lod])} self.outputs = {'Out': distance, 'SequenceNum': sequence_num} diff --git a/python/paddle/fluid/tests/unittests/test_elementwise_add_mkldnn_op.py b/python/paddle/fluid/tests/unittests/test_elementwise_add_mkldnn_op.py new file mode 100644 index 0000000000..bcdbfc8e52 --- /dev/null +++ b/python/paddle/fluid/tests/unittests/test_elementwise_add_mkldnn_op.py @@ -0,0 +1,130 @@ +# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import unittest +import numpy as np +import paddle.fluid.core as core +from op_test import OpTest +from test_elementwise_add_op import * +''' +Some tests differ from the tests defined in test_elementwise_add_op.py +because MKLDNN does not support tensors of number of dimensions 3. +Such dimensions cause exceptions in MKLDNN reorder primitive. +''' + + +class TestMKLDNNElementwiseAddOp(TestElementwiseAddOp): + def init_input_output(self): + self.x = np.random.uniform(0.1, 1, [2, 3, 4, 5]).astype(self.dtype) + self.y = np.random.uniform(0.1, 1, [2, 3, 4, 5]).astype(self.dtype) + self.out = np.add(self.x, self.y) + + def init_kernel_type(self): + self.use_mkldnn = True + + +class TestMKLDNNElementwiseAddOp_scalar(TestElementwiseAddOp_scalar): + def init_input_output(self): + self.x = np.random.rand(2, 3, 4, 5).astype(self.dtype) + self.y = np.random.rand(1).astype(self.dtype) + self.out = self.x + self.y + + def init_kernel_type(self): + self.use_mkldnn = True + + +class TestMKLDNNElementwiseAddOp_scalar2(TestElementwiseAddOp_scalar2): + def init_input_output(self): + self.x = np.random.rand(2, 3, 4, 5).astype(self.dtype) + self.y = np.random.rand(1, 1).astype(self.dtype) + self.out = self.x + self.y + + def init_kernel_type(self): + self.use_mkldnn = True + + +class TestMKLDNNElementwiseAddOp_Vector(TestElementwiseAddOp_Vector): + def init_kernel_type(self): + self.use_mkldnn = True + + +class TesMKLDNNtElementwiseAddOp_broadcast_0(TestElementwiseAddOp_broadcast_0): + def init_input_output(self): + self.x = np.random.rand(2, 3, 4, 5).astype(self.dtype) + self.y = np.random.rand(2).astype(self.dtype) + self.out = self.x + self.y.reshape(2, 1, 1, 1) + + def init_kernel_type(self): + self.use_mkldnn = True + + +class TestMKLDNNElementwiseAddOp_broadcast_1(TestElementwiseAddOp_broadcast_1): + def init_input_output(self): + self.x = np.random.rand(2, 3, 4, 5).astype(self.dtype) + self.y = np.random.rand(3).astype(self.dtype) + self.out = self.x + self.y.reshape(1, 3, 1, 1) + + def init_kernel_type(self): + self.use_mkldnn = True + + +class TestMKLDNNElementwiseAddOp_broadcast_2(TestElementwiseAddOp_broadcast_2): + def init_input_output(self): + self.x = np.random.rand(2, 2, 3, 4).astype(self.dtype) + self.y = np.random.rand(4).astype(self.dtype) + self.out = self.x + self.y.reshape(1, 1, 1, 4) + + def init_kernel_type(self): + self.use_mkldnn = True + + +class TestMKLDNNElementwiseAddOp_broadcast_3(TestElementwiseAddOp_broadcast_3): + def init_kernel_type(self): + self.use_mkldnn = True + + +class TestMKLDNNElementwiseAddOp_broadcast_4(TestElementwiseAddOp_broadcast_4): + def init_kernel_type(self): + self.use_mkldnn = True + + +class TestMKLDNNElementwiseAddOp_rowwise_add_0( + TestElementwiseAddOp_rowwise_add_0): + def init_input_output(self): + self.x = np.random.rand(2, 3, 4, 5).astype(self.dtype) + self.y = np.random.rand(3, 4).astype(self.dtype) + self.out = self.x + self.y.reshape(1, 3, 4, 1) + + def init_kernel_type(self): + self.use_mkldnn = True + + +class TestMKLDNNElementwiseAddOp_rowwise_add_1( + TestElementwiseAddOp_rowwise_add_1): + def init_kernel_type(self): + self.use_mkldnn = True + + +class TestMKLDNNElementwiseAddOp_channelwise_add( + TestElementwiseAddOp_channelwise_add): + def init_input_output(self): + self.x = np.random.rand(3, 5, 20, 20).astype(self.dtype) + self.y = np.random.rand(3, 1, 1, 1).astype(self.dtype) + self.out = self.x + self.y + + def init_kernel_type(self): + self.use_mkldnn = True + + +if __name__ == '__main__': + unittest.main() diff --git a/python/paddle/fluid/tests/unittests/test_elementwise_add_op.py b/python/paddle/fluid/tests/unittests/test_elementwise_add_op.py index 96d47906a0..fb9a496126 100644 --- a/python/paddle/fluid/tests/unittests/test_elementwise_add_op.py +++ b/python/paddle/fluid/tests/unittests/test_elementwise_add_op.py @@ -18,19 +18,23 @@ from op_test import OpTest class TestElementwiseAddOp(OpTest): + def init_kernel_type(self): + self.use_mkldnn = False + def setUp(self): self.op_type = "elementwise_add" self.dtype = np.float32 self.axis = -1 self.init_dtype() self.init_input_output() + self.init_kernel_type() self.init_axis() self.inputs = { 'X': OpTest.np_dtype_to_fluid_dtype(self.x), 'Y': OpTest.np_dtype_to_fluid_dtype(self.y) } - self.attrs = {'axis': self.axis} + self.attrs = {'axis': self.axis, 'use_mkldnn': self.use_mkldnn} self.outputs = {'Out': self.out} def test_check_output(self): diff --git a/python/paddle/fluid/tests/unittests/test_fake_dequantize_op.py b/python/paddle/fluid/tests/unittests/test_fake_dequantize_op.py index 281068e945..026ac2112b 100644 --- a/python/paddle/fluid/tests/unittests/test_fake_dequantize_op.py +++ b/python/paddle/fluid/tests/unittests/test_fake_dequantize_op.py @@ -40,7 +40,6 @@ class TestFakeDequantizeMaxAbsOp(OpTest): self.op_type = "fake_dequantize_max_abs" x = np.random.randn(31, 65).astype("float32") yq, scale = quantize_max_abs(x, self.num_bits) - print 'scale ', scale ydq = dequantize_max_abs(yq, self.num_bits, scale) self.inputs = {'X': yq} diff --git a/python/paddle/fluid/tests/unittests/test_feed_fetch_method.py b/python/paddle/fluid/tests/unittests/test_feed_fetch_method.py index 9d724a6479..8b9da84311 100644 --- a/python/paddle/fluid/tests/unittests/test_feed_fetch_method.py +++ b/python/paddle/fluid/tests/unittests/test_feed_fetch_method.py @@ -24,17 +24,16 @@ class TestFeedFetch(unittest.TestCase): input_array = np.ones((4, 4, 6)).astype("float32") input_array[0, 0, 0] = 3 input_array[3, 3, 5] = 10 - input_tensor = core.LoDTensor([[0, 2, 4]]) + input_tensor = core.LoDTensor([[2, 2]]) input_tensor.set(input_array, place) core.set_feed_variable(scope, input_tensor, "feed", 0) output_tensor = core.get_fetch_variable(scope, "feed", 0) - output_lod = output_tensor.lod() - self.assertEqual(0, output_lod[0][0]) + output_lod = output_tensor.recursive_sequence_lengths() + self.assertEqual(2, output_lod[0][0]) self.assertEqual(2, output_lod[0][1]) - self.assertEqual(4, output_lod[0][2]) output_array = np.array(output_tensor) self.assertEqual(3, output_array[0, 0, 0]) diff --git a/python/paddle/fluid/tests/unittests/test_fill_constant_batch_size_like_op.py b/python/paddle/fluid/tests/unittests/test_fill_constant_batch_size_like_op.py index 533d8ccfac..0c75cf33f5 100644 --- a/python/paddle/fluid/tests/unittests/test_fill_constant_batch_size_like_op.py +++ b/python/paddle/fluid/tests/unittests/test_fill_constant_batch_size_like_op.py @@ -55,7 +55,7 @@ class TestFillConstantBatchSizeLikeWithLoDTensor(OpTest): self.op_type = "fill_constant_batch_size_like" self.inputs = { 'Input': (np.random.random((31, 28)).astype("float32"), - [[0, 9, 23, 31]]) + [[9, 14, 8]]) } self.attrs = { 'value': 3.5, diff --git a/paddle/contrib/tape/CMakeLists.txt b/python/paddle/fluid/tests/unittests/test_gaussian_random_mkldnn_op.py similarity index 50% rename from paddle/contrib/tape/CMakeLists.txt rename to python/paddle/fluid/tests/unittests/test_gaussian_random_mkldnn_op.py index 5450359d85..3ae877a608 100644 --- a/paddle/contrib/tape/CMakeLists.txt +++ b/python/paddle/fluid/tests/unittests/test_gaussian_random_mkldnn_op.py @@ -1,25 +1,26 @@ -# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. +# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # -# http://www.apache.org/licenses/LICENSE-2.0 +# http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -# -if(APPLE) - set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wno-error=pessimizing-move") -endif(APPLE) +import unittest + +from test_gaussian_random_op import TestGaussianRandomOp + + +class TestMKLDNN(TestGaussianRandomOp): + def init_kernel_type(self): + self.use_mkldnn = True -cc_library(tape_variable SRCS variable.cc DEPS ${FLUID_CORE_MODULES} device_context framework_proto proto_desc operator) -cc_library(tape SRCS tape.cc DEPS ${FLUID_CORE_MODULES} ${GLOB_OP_LIB} tape_variable) -cc_test(test_tape - SRCS test_tape.cc - DEPS tape tape_variable) +if __name__ == '__main__': + unittest.main() diff --git a/python/paddle/fluid/tests/unittests/test_gaussian_random_op.py b/python/paddle/fluid/tests/unittests/test_gaussian_random_op.py index 272caceaf3..8481500fd7 100644 --- a/python/paddle/fluid/tests/unittests/test_gaussian_random_op.py +++ b/python/paddle/fluid/tests/unittests/test_gaussian_random_op.py @@ -25,7 +25,15 @@ class TestGaussianRandomOp(unittest.TestCase): def setUp(self): self.op_type = "gaussian_random" self.inputs = {} - self.attrs = {"shape": [1000, 784], "mean": .0, "std": 1., "seed": 10} + self.use_mkldnn = False + self.init_kernel_type() + self.attrs = { + "shape": [1000, 784], + "mean": .0, + "std": 1., + "seed": 10, + "use_mkldnn": self.use_mkldnn + } self.outputs = ["Out"] @@ -58,6 +66,9 @@ class TestGaussianRandomOp(unittest.TestCase): self.assertAlmostEqual(numpy.mean(tensor), .0, delta=0.1) self.assertAlmostEqual(numpy.std(tensor), 1., delta=0.1) + def init_kernel_type(self): + pass + if __name__ == "__main__": unittest.main() diff --git a/python/paddle/fluid/tests/unittests/test_gru_op.py b/python/paddle/fluid/tests/unittests/test_gru_op.py index 3a13eb872a..8fbf156085 100644 --- a/python/paddle/fluid/tests/unittests/test_gru_op.py +++ b/python/paddle/fluid/tests/unittests/test_gru_op.py @@ -20,8 +20,8 @@ from test_lstm_op import identity, sigmoid, tanh, relu class TestGRUOp(OpTest): - lod = [[0, 2, 6, 9]] - batch_size = lod[0][-1] + lod = [[2, 4, 3]] + batch_size = sum(lod[0]) frame_size = 5 activate = { 'identity': identity, @@ -33,10 +33,10 @@ class TestGRUOp(OpTest): @staticmethod def seq_to_batch(lod, is_reverse): idx_in_seq_list = [] - seq_starts = lod[0] - seq_lens = [] - for i in range(len(seq_starts) - 1): - seq_lens.append(seq_starts[i + 1] - seq_starts[i]) + seq_lens = lod[0] + seq_starts = [0] + for i in range(len(seq_lens)): + seq_starts.append(seq_starts[-1] + seq_lens[i]) sorted_seqs = sorted( range(len(seq_lens)), lambda x, y: seq_lens[y] - seq_lens[x]) num_batch = seq_lens[sorted_seqs[0]] diff --git a/python/paddle/fluid/tests/unittests/test_initializer.py b/python/paddle/fluid/tests/unittests/test_initializer.py index 587e2025e1..15a72cb605 100644 --- a/python/paddle/fluid/tests/unittests/test_initializer.py +++ b/python/paddle/fluid/tests/unittests/test_initializer.py @@ -364,5 +364,22 @@ class TestMSRAInitializer(unittest.TestCase): self.assertEqual(init_op.attr('seed'), 134) +class TestMSRAInitializer(unittest.TestCase): + def test_bilinear_initializer(self): + """Test the bilinear initializer with supplied arguments + """ + program = framework.Program() + block = program.global_block() + block.create_parameter( + dtype="float32", + shape=[8, 1, 3, 3], + lod_level=0, + name="param", + initializer=initializer.BilinearInitializer()) + self.assertEqual(len(block.ops), 1) + init_op = block.ops[0] + self.assertEqual(init_op.type, 'assign_value') + + if __name__ == '__main__': unittest.main() diff --git a/python/paddle/fluid/tests/unittests/test_iou_similarity_op.py b/python/paddle/fluid/tests/unittests/test_iou_similarity_op.py index 8f62ac20a5..eff4212d91 100644 --- a/python/paddle/fluid/tests/unittests/test_iou_similarity_op.py +++ b/python/paddle/fluid/tests/unittests/test_iou_similarity_op.py @@ -58,8 +58,8 @@ class TestIOUSimilarityOpWithLoD(TestIOUSimilarityOp): def setUp(self): super(TestIOUSimilarityOpWithLoD, self).setUp() - self.boxes1_lod = [[0, 1, 2]] - self.output_lod = [[0, 1, 2]] + self.boxes1_lod = [[1, 1]] + self.output_lod = [[1, 1]] self.inputs = {'X': (self.boxes1, self.boxes1_lod), 'Y': self.boxes2} self.outputs = {'Out': (self.output, self.output_lod)} diff --git a/python/paddle/fluid/tests/unittests/test_layers.py b/python/paddle/fluid/tests/unittests/test_layers.py index f5b305a025..becf73e3b5 100644 --- a/python/paddle/fluid/tests/unittests/test_layers.py +++ b/python/paddle/fluid/tests/unittests/test_layers.py @@ -411,6 +411,33 @@ class TestBook(unittest.TestCase): self.assertIsNotNone(output) print(str(program)) + def test_crop(self): + program = Program() + with program_guard(program): + x = layers.data(name='x', shape=[3, 5], dtype="float32") + y = layers.data(name='y', shape=[2, 3], dtype="float32") + output = layers.crop(x, shape=y) + self.assertIsNotNone(output) + print(str(program)) + + def test_mean_iou(self): + program = Program() + with program_guard(program): + x = layers.data(name='x', shape=[16], dtype='float32') + y = layers.data(name='label', shape=[1], dtype='int64') + iou = layers.mean_iou(x, y, 2) + self.assertIsNotNone(iou) + print(str(program)) + + def test_argsort(self): + program = Program() + with program_guard(program): + data = layers.data(name='x', shape=[2, 3, 3], dtype="float32") + out, ids = layers.argsort(input=data, axis=1) + self.assertIsNotNone(out) + self.assertIsNotNone(ids) + print(str(program)) + if __name__ == '__main__': unittest.main() diff --git a/python/paddle/fluid/tests/unittests/test_linear_chain_crf_op.py b/python/paddle/fluid/tests/unittests/test_linear_chain_crf_op.py index f49f7635f7..696d0ab4fa 100644 --- a/python/paddle/fluid/tests/unittests/test_linear_chain_crf_op.py +++ b/python/paddle/fluid/tests/unittests/test_linear_chain_crf_op.py @@ -105,11 +105,13 @@ class TestLinearChainCrfOp(OpTest): MAX_SEQ_LEN = 5 # the linear_chain_crf operator only supports sequence (LoD level = 1) - lod = [[0]] + lod = [[]] + seq_start_pos = [0] for i in range(SEQ_NUM): - lod[-1].append(lod[-1][-1] + random.randint(1, MAX_SEQ_LEN)) - emission = np.random.uniform(-1, 1, - [lod[-1][-1], TAG_NUM]).astype("float64") + lod[-1].append(random.randint(1, MAX_SEQ_LEN)) + seq_start_pos.append(seq_start_pos[-1] + lod[-1][-1]) + emission = np.random.uniform( + -1, 1, [seq_start_pos[-1], TAG_NUM]).astype("float64") emission_row_max = np.amax(emission, axis=1, keepdims=True) emission_exps = np.exp(emission - emission_row_max) @@ -118,14 +120,14 @@ class TestLinearChainCrfOp(OpTest): transition_exps = np.exp(transition) labels = np.random.randint( - low=0, high=TAG_NUM, size=(lod[-1][-1], 1), dtype="int64") + low=0, high=TAG_NUM, size=(seq_start_pos[-1], 1), dtype="int64") self.inputs = { "Emission": (emission, lod), "Transition": transition, "Label": (labels, lod) } - crf = LinearChainCrfForward(lod[0], emission, emission_row_max, + crf = LinearChainCrfForward(seq_start_pos, emission, emission_row_max, emission_exps, transition, transition_exps, labels) alpha, log_likelihood = crf.crf_forward_compute() diff --git a/python/paddle/fluid/tests/unittests/test_listen_and_serv_op.py b/python/paddle/fluid/tests/unittests/test_listen_and_serv_op.py index d1d709551c..1cdc695010 100644 --- a/python/paddle/fluid/tests/unittests/test_listen_and_serv_op.py +++ b/python/paddle/fluid/tests/unittests/test_listen_and_serv_op.py @@ -57,17 +57,18 @@ class TestListenAndServOp(OpTest): def setUp(self): self.ps_timeout = 5 self.ip = "127.0.0.1" - self.port = "6173" + self.port = "0" self.trainers = 1 - self.trainer_id = 1 + self.trainer_id = 0 def _start_pserver(self, use_cuda, sync_mode): p = Process( target=run_pserver, args=(use_cuda, sync_mode, self.ip, self.port, self.trainers, self.trainer_id)) + p.daemon = True p.start() - return p.pid + return p def _wait_ps_ready(self, pid): start_left_time = self.ps_timeout @@ -89,18 +90,20 @@ class TestListenAndServOp(OpTest): def test_handle_signal_in_serv_op(self): # run pserver on CPU in sync mode - pid = self._start_pserver(False, True) - self._wait_ps_ready(pid) + p1 = self._start_pserver(False, True) + self._wait_ps_ready(p1.pid) # raise SIGTERM to pserver - os.kill(pid, signal.SIGTERM) + os.kill(p1.pid, signal.SIGINT) + p1.join() # run pserver on CPU in async mode - pid = self._start_pserver(False, False) - self._wait_ps_ready(pid) + p2 = self._start_pserver(False, False) + self._wait_ps_ready(p2.pid) # raise SIGTERM to pserver - os.kill(pid, signal.SIGTERM) + os.kill(p2.pid, signal.SIGTERM) + p2.join() if __name__ == '__main__': diff --git a/python/paddle/fluid/tests/unittests/test_lod_rank_table.py b/python/paddle/fluid/tests/unittests/test_lod_rank_table.py index 093eecb837..bac5e50231 100644 --- a/python/paddle/fluid/tests/unittests/test_lod_rank_table.py +++ b/python/paddle/fluid/tests/unittests/test_lod_rank_table.py @@ -30,7 +30,8 @@ class TestLoDRankTable(unittest.TestCase): tensor = core.LoDTensor() tensor.set(numpy.random.random(size=(17, 100)), cpu) - tensor.set_lod([[0, 1, 3], [0, 5, 6, 7], [0, 3, 4, 9, 10, 13, 16, 17]]) + tensor.set_recursive_sequence_lengths( + [[1, 2], [5, 1, 1], [3, 1, 5, 1, 3, 3, 1]]) exe.run(scope=scope, feed={'x': tensor}) var = scope.find_var(rank_table.name) table = var.get_lod_rank_table() diff --git a/python/paddle/fluid/tests/unittests/test_lod_reset_op.py b/python/paddle/fluid/tests/unittests/test_lod_reset_op.py index 6b6d4c824a..77905c4b96 100644 --- a/python/paddle/fluid/tests/unittests/test_lod_reset_op.py +++ b/python/paddle/fluid/tests/unittests/test_lod_reset_op.py @@ -21,11 +21,15 @@ class TestLodResetOpByAttr(OpTest): def setUp(self): self.op_type = "lod_reset" x = np.random.random((10, 20)).astype("float32") - lod = [[0, 3, 5, 10]] - target_lod_0 = [0, 7, 10] + lod = [[3, 2, 5]] + # target_offset_lod and target_lod are the same lod info represented + # in offset-based format and length-based format, respectively. + target_offset_lod = [0, 7, 10] + target_lod = [7, 3] self.inputs = {'X': (x, lod)} - self.attrs = {'target_lod': target_lod_0} - self.outputs = {'Out': (x, [target_lod_0])} + # The `target_lod` attribute is still based on offset + self.attrs = {'target_lod': target_offset_lod} + self.outputs = {'Out': (x, [target_lod])} def test_check_output(self): self.check_output() @@ -38,13 +42,16 @@ class TestLodResetOpByInput(OpTest): def setUp(self): self.op_type = "lod_reset" x = np.random.random((10, 20)).astype("float32") - lod = [[0, 3, 5, 10]] - target_lod_0 = [0, 4, 7, 10] + lod = [[3, 2, 5]] + # target_offset_lod and target_lod are the same lod info represented + # in offset-based format and length-based format, respectively. + target_offset_lod = [0, 4, 7, 10] + target_lod = [4, 3, 3] self.inputs = { 'X': (x, lod), - 'Y': np.array([target_lod_0]).astype('int32') + 'Y': np.array([target_offset_lod]).astype('int32') } - self.outputs = {'Out': (x, [target_lod_0])} + self.outputs = {'Out': (x, [target_lod])} def test_check_output(self): self.check_output() @@ -57,15 +64,16 @@ class TestLodResetOpBoth(OpTest): def setUp(self): self.op_type = "lod_reset" x = np.random.random((10, 20)).astype("float32") - lod = [[0, 3, 5, 10]] - target_lod_0_attr = [0, 7, 10] - target_lod_0_in = [0, 4, 7, 10] + lod = [[3, 2, 5]] + target_offset_lod_attr = [0, 7, 10] + target_offset_lod_in = [0, 4, 7, 10] + target_lod_in = [4, 3, 3] self.inputs = { 'X': (x, lod), - 'Y': np.array(target_lod_0_in).astype('int32') + 'Y': np.array(target_offset_lod_in).astype('int32') } - self.attrs = {'target_lod': target_lod_0_attr} - self.outputs = {'Out': (x, [target_lod_0_in])} + self.attrs = {'target_lod': target_offset_lod_attr} + self.outputs = {'Out': (x, [target_lod_in])} def test_check_output(self): self.check_output() @@ -78,11 +86,11 @@ class TestLodResetOpYIsLoDTensor(OpTest): def setUp(self): self.op_type = "lod_reset" x = np.random.random((10, 20)).astype("float32") - lod = [[0, 3, 5, 10]] + lod = [[3, 2, 5]] y = np.random.random((10, 10)).astype("float32") - target_lod_0 = [[0, 4, 7, 10]] - self.inputs = {'X': (x, lod), 'Y': (y, target_lod_0)} - self.outputs = {'Out': (x, target_lod_0)} + target_lod = [[4, 3, 3]] + self.inputs = {'X': (x, lod), 'Y': (y, target_lod)} + self.outputs = {'Out': (x, target_lod)} def test_check_output(self): self.check_output() diff --git a/python/paddle/fluid/tests/unittests/test_lod_tensor_array.py b/python/paddle/fluid/tests/unittests/test_lod_tensor_array.py index 63b17a5ccd..118c22fbb1 100644 --- a/python/paddle/fluid/tests/unittests/test_lod_tensor_array.py +++ b/python/paddle/fluid/tests/unittests/test_lod_tensor_array.py @@ -27,7 +27,7 @@ class TestLoDTensorArray(unittest.TestCase): for i in xrange(10): t = core.LoDTensor() t.set(numpy.array([i], dtype='float32'), cpu) - t.set_lod([[0, 1]]) + t.set_recursive_sequence_lengths([[1]]) tensor_array.append(t) self.assertEqual(10, len(tensor_array)) @@ -35,17 +35,17 @@ class TestLoDTensorArray(unittest.TestCase): for i in xrange(10): t = tensor_array[i] self.assertEqual(numpy.array(t), numpy.array([i], dtype='float32')) - self.assertEqual([[0, 1]], t.lod()) + self.assertEqual([[1]], t.recursive_sequence_lengths()) t = core.LoDTensor() t.set(numpy.array([i + 10], dtype='float32'), cpu) - t.set_lod([[0, 2]]) + t.set_recursive_sequence_lengths([[1]]) tensor_array[i] = t t = tensor_array[i] self.assertEqual( numpy.array(t), numpy.array( [i + 10], dtype='float32')) - self.assertEqual([[0, 2]], t.lod()) + self.assertEqual([[1]], t.recursive_sequence_lengths()) if __name__ == '__main__': diff --git a/python/paddle/fluid/tests/unittests/test_lod_tensor_array_ops.py b/python/paddle/fluid/tests/unittests/test_lod_tensor_array_ops.py index 66a03640c1..cebe6997bb 100644 --- a/python/paddle/fluid/tests/unittests/test_lod_tensor_array_ops.py +++ b/python/paddle/fluid/tests/unittests/test_lod_tensor_array_ops.py @@ -29,7 +29,7 @@ class TestCPULoDTensorArrayOps(unittest.TestCase): tensor = core.LoDTensor() tensor.set( numpy.arange(10).reshape(10, 1).astype('int32'), self.place()) - tensor.set_lod([[0, 3, 9, 10]]) + tensor.set_recursive_sequence_lengths([[3, 6, 1]]) expect = map(lambda x: numpy.array(x).astype('int32'), [[3, 0, 9], [4, 1], [5, 2], [6], [7], [8]]) self.main( @@ -42,7 +42,7 @@ class TestCPULoDTensorArrayOps(unittest.TestCase): tensor = core.LoDTensor() tensor.set( numpy.arange(10).reshape(10, 1).astype('int32'), self.place()) - tensor.set_lod([[0, 3, 9, 9, 10]]) + tensor.set_recursive_sequence_lengths([[3, 6, 0, 1]]) expect = map(lambda x: numpy.array(x).astype('int32'), [[3, 0, 9], [4, 1], [5, 2], [6], [7], [8]]) self.main( @@ -55,7 +55,7 @@ class TestCPULoDTensorArrayOps(unittest.TestCase): tensor = core.LoDTensor() tensor.set( numpy.arange(20).reshape(20, 1).astype('int32'), self.place()) - tensor.set_lod([[0, 2, 5], [0, 3, 9, 11, 17, 20]]) + tensor.set_recursive_sequence_lengths([[2, 3], [3, 6, 2, 6, 3]]) expect = [ numpy.array( @@ -65,7 +65,7 @@ class TestCPULoDTensorArrayOps(unittest.TestCase): [17, 18, 19], dtype='int32') ] - lod = [[[0, 2, 5]], [[0, 6, 12]], [[0, 3]]] + lod = [[[2, 3]], [[6, 6]], [[3]]] self.main( tensor=tensor, expect_array=expect, @@ -77,8 +77,8 @@ class TestCPULoDTensorArrayOps(unittest.TestCase): tensor.set( numpy.arange(31).reshape(31, 1).astype('int32'), self.place()) - tensor.set_lod([[0, 3, 5, 9, 11], - [0, 3, 7, 11, 11, 12, 17, 19, 21, 23, 30, 31]]) + tensor.set_recursive_sequence_lengths( + [[3, 2, 4, 2], [3, 4, 4, 0, 1, 5, 2, 2, 2, 7, 1]]) expect = [ numpy.array( @@ -88,7 +88,7 @@ class TestCPULoDTensorArrayOps(unittest.TestCase): ], [17, 18, 3, 4, 5, 6, 11, 30], [19, 20, 7, 8, 9, 10], [21, 22]] ] - lod = [[[0, 5, 8, 8, 15]], [[0, 2, 6, 7, 8]], [[0, 2, 6]], [[0, 2]]] + lod = [[[5, 3, 0, 7]], [[2, 4, 1, 1]], [[2, 4]], [[2]]] self.main( tensor=tensor, expect_array=expect, @@ -99,8 +99,9 @@ class TestCPULoDTensorArrayOps(unittest.TestCase): tensor = core.LoDTensor() tensor.set( numpy.arange(50).reshape(50, 1).astype('int32'), self.place()) - tensor.set_lod([[0, 2, 5, 6], [0, 2, 5, 6, 10, 12, 13], - [0, 3, 7, 11, 17, 21, 22, 23, 27, 31, 39, 45, 46, 50]]) + tensor.set_recursive_sequence_lengths( + [[2, 3, 1], [2, 3, 1, 4, 2, 1], + [3, 4, 4, 6, 4, 1, 1, 4, 4, 8, 6, 1, 4]]) expect = [ numpy.array( @@ -108,8 +109,8 @@ class TestCPULoDTensorArrayOps(unittest.TestCase): for item in [[21, 0, 1, 2, 3, 4, 5, 6, 46, 47, 48, 49], range( 22, 39) + range(7, 21), range(39, 46)] ] - lod = [[[0, 1, 3, 4], [0, 1, 4, 8, 12]], - [[0, 4, 7], [0, 1, 5, 9, 17, 21, 27, 31]], [[0, 2], [0, 6, 7]]] + lod = [[[1, 2, 1], [1, 3, 4, 4]], [[4, 3], [1, 4, 4, 8, 4, 6, 4]], + [[2], [6, 1]]] self.main( tensor=tensor, expect_array=expect, @@ -120,8 +121,9 @@ class TestCPULoDTensorArrayOps(unittest.TestCase): tensor = core.LoDTensor() tensor.set( numpy.arange(50).reshape(50, 1).astype('int32'), self.place()) - tensor.set_lod([[0, 2, 5, 6], [0, 2, 5, 6, 10, 12, 13], - [0, 3, 7, 11, 17, 21, 22, 23, 27, 31, 39, 45, 46, 50]]) + tensor.set_recursive_sequence_lengths( + [[2, 3, 1], [2, 3, 1, 4, 2, 1], + [3, 4, 4, 6, 4, 1, 1, 4, 4, 8, 6, 1, 4]]) self.main( tensor=tensor, expect_array=None, @@ -162,12 +164,13 @@ class TestCPULoDTensorArrayOps(unittest.TestCase): exp_tensor, exp_lod = exp exp_tensor = numpy.expand_dims(exp_tensor, axis=1) self.assertTrue(numpy.allclose(exp_tensor, numpy.array(array[i]))) - self.assertEqual(exp_lod, array[i].lod()) + self.assertEqual(exp_lod, array[i].recursive_sequence_lengths()) def check_tensor_same(self, actual, expect): self.assertTrue( numpy.allclose(numpy.array(actual), numpy.array(expect))) - self.assertEqual(actual.lod(), expect.lod()) + self.assertEqual(actual.recursive_sequence_lengths(), + expect.recursive_sequence_lengths()) class TestCPULoDTensorArrayOpGrad(unittest.TestCase): @@ -188,7 +191,7 @@ class TestCPULoDTensorArrayOpGrad(unittest.TestCase): tensor = core.LoDTensor() tensor.set(numpy.arange(10).reshape(10, 1).astype('float32'), place) - tensor.set_lod([[0, 3, 9, 10]]) + tensor.set_recursive_sequence_lengths([[3, 6, 1]]) g_vars = program.global_block().var(x.name + "@GRAD") diff --git a/python/paddle/fluid/tests/unittests/test_lstm_op.py b/python/paddle/fluid/tests/unittests/test_lstm_op.py index e726f99d49..705a24bd8f 100644 --- a/python/paddle/fluid/tests/unittests/test_lstm_op.py +++ b/python/paddle/fluid/tests/unittests/test_lstm_op.py @@ -84,15 +84,17 @@ def lstm( h = g_o * act_cell(c) return h, c - def _reverse(x, lod): + def _reverse(x, offset): y = np.zeros_like(x) - for i in range(len(lod) - 1): - b, e = lod[i], lod[i + 1] + for i in range(len(offset) - 1): + b, e = offset[i], offset[i + 1] y[b:e, :] = np.flip(x[b:e, :], 0) return y - offset = lod[0] - batch_size = len(offset) - 1 + offset = [0] + for l in lod[0]: + offset.append(offset[-1] + l) + batch_size = len(lod[0]) hidden = [] cell = [] input = _reverse(input, offset) if is_reverse else input @@ -100,7 +102,7 @@ def lstm( input = input + np.tile(w_b, (offset[-1], 1)) for i in range(batch_size): # compute one sequence - seq_len = offset[i + 1] - offset[i] + seq_len = lod[0][i] x = input[offset[i]:offset[i + 1], :] h_pre = h0[i] # 1 x D c_pre = c0[i] # 1 x D @@ -124,7 +126,7 @@ def lstm( class TestLstmOp(OpTest): def set_argument(self): - self.lod = [[0, 2, 5, 7]] + self.lod = [[2, 3, 2]] self.D = 16 self.act_gate = 'sigmoid' @@ -139,8 +141,8 @@ class TestLstmOp(OpTest): self.set_argument() self.op_type = 'lstm' - T = self.lod[0][-1] - N = len(self.lod[0]) - 1 + T = sum(self.lod[0]) + N = len(self.lod[0]) x = np.random.normal(size=(T, 4 * self.D)).astype('float64') if self.has_initial_state: @@ -186,7 +188,7 @@ class TestLstmOp(OpTest): def test_check_grad(self): # TODO(qingqing) remove folowing lines after the check_grad is refined. - N = len(self.lod[0]) - 1 + N = len(self.lod[0]) self.outputs['BatchGate'] = np.zeros((N, 4 * self.D)).astype('float64') self.outputs['BatchCellPreAct'] = np.zeros( (N, self.D)).astype('float64') @@ -196,7 +198,7 @@ class TestLstmOp(OpTest): # class TestLstmOpHasInitial(TestLstmOp): # def set_argument(self): -# self.lod = [[0, 2, 5, 7]] +# self.lod = [[2, 3, 2]] # self.D = 16 # self.act_gate = 'sigmoid' @@ -209,7 +211,7 @@ class TestLstmOp(OpTest): # def test_check_grad(self): # # TODO(qingqing) remove folowing lines after the check_grad is refined. -# N = len(self.lod[0]) - 1 +# N = len(self.lod[0]) # self.outputs['BatchGate'] = np.zeros((N, 4 * self.D)).astype('float64') # self.outputs['BatchCellPreAct'] = np.zeros( # (N, self.D)).astype('float64') @@ -218,7 +220,7 @@ class TestLstmOp(OpTest): # max_relative_error=5e-4) # def test_check_grad_ingore_bias(self): -# N = len(self.lod[0]) - 1 +# N = len(self.lod[0]) # self.outputs['BatchGate'] = np.zeros((N, 4 * self.D)).astype('float64') # self.outputs['BatchCellPreAct'] = np.zeros( # (N, self.D)).astype('float64') @@ -228,7 +230,7 @@ class TestLstmOp(OpTest): # no_grad_set=set('Bias')) # def test_check_grad_ingore_weight(self): -# N = len(self.lod[0]) - 1 +# N = len(self.lod[0]) # self.outputs['BatchGate'] = np.zeros((N, 4 * self.D)).astype('float64') # self.outputs['BatchCellPreAct'] = np.zeros( # (N, self.D)).astype('float64') @@ -238,7 +240,7 @@ class TestLstmOp(OpTest): # no_grad_set=set('Weight')) # def test_check_grad_ingore_input(self): -# N = len(self.lod[0]) - 1 +# N = len(self.lod[0]) # self.outputs['BatchGate'] = np.zeros((N, 4 * self.D)).astype('float64') # self.outputs['BatchCellPreAct'] = np.zeros( # (N, self.D)).astype('float64') @@ -248,7 +250,7 @@ class TestLstmOp(OpTest): # no_grad_set=set('Input')) # def test_check_grad_ingore_h0(self): -# N = len(self.lod[0]) - 1 +# N = len(self.lod[0]) # self.outputs['BatchGate'] = np.zeros((N, 4 * self.D)).astype('float64') # self.outputs['BatchCellPreAct'] = np.zeros( # (N, self.D)).astype('float64') @@ -258,7 +260,7 @@ class TestLstmOp(OpTest): # no_grad_set=set('H0')) # def test_check_grad_ingore_c0(self): -# N = len(self.lod[0]) - 1 +# N = len(self.lod[0]) # self.outputs['BatchGate'] = np.zeros((N, 4 * self.D)).astype('float64') # self.outputs['BatchCellPreAct'] = np.zeros( # (N, self.D)).astype('float64') @@ -269,7 +271,7 @@ class TestLstmOp(OpTest): # class TestLstmOpRerverse(TestLstmOp): # def set_argument(self): -# self.lod = [[0, 2, 5, 7]] +# self.lod = [[2, 3, 2]] # self.D = 16 # self.act_gate = 'sigmoid' @@ -282,7 +284,7 @@ class TestLstmOp(OpTest): # class TestLstmOpNotUsePeepholes(TestLstmOp): # def set_argument(self): -# self.lod = [[0, 2, 5, 7]] +# self.lod = [[2, 3, 2]] # self.D = 16 # self.act_gate = 'sigmoid' diff --git a/python/paddle/fluid/tests/unittests/test_lstmp_op.py b/python/paddle/fluid/tests/unittests/test_lstmp_op.py index afff133f6c..ed2262da4b 100644 --- a/python/paddle/fluid/tests/unittests/test_lstmp_op.py +++ b/python/paddle/fluid/tests/unittests/test_lstmp_op.py @@ -64,15 +64,17 @@ def lstmp( r = act_proj(r) return r, c - def _reverse(x, lod): + def _reverse(x, offset): y = np.zeros_like(x) - for i in range(len(lod) - 1): - b, e = lod[i], lod[i + 1] + for i in range(len(offset) - 1): + b, e = offset[i], offset[i + 1] y[b:e, :] = np.flip(x[b:e, :], 0) return y - offset = lod[0] - batch_size = len(offset) - 1 + offset = [0] + for l in lod[0]: + offset.append(offset[-1] + l) + batch_size = len(lod[0]) # recurrent projection state projection = [] cell = [] @@ -81,7 +83,7 @@ def lstmp( input = input + np.tile(w_b, (offset[-1], 1)) for i in range(batch_size): # compute one sequence - seq_len = offset[i + 1] - offset[i] + seq_len = lod[0][i] x = input[offset[i]:offset[i + 1], :] r_pre = np.dot(h0[i], w_rh) # 1 x P r_pre = act_proj(r_pre) @@ -117,8 +119,8 @@ class TestLstmpOp(LstmTest.TestLstmOp): self.reset_argument() self.op_type = 'lstmp' - T = self.lod[0][-1] - N = len(self.lod[0]) - 1 + T = sum(self.lod[0]) + N = len(self.lod[0]) x = np.random.normal(size=(T, 4 * self.D)).astype('float64') if self.has_initial_state: @@ -166,7 +168,7 @@ class TestLstmpOp(LstmTest.TestLstmOp): def test_check_grad(self): # TODO(qingqing) remove folowing lines after the check_grad is refined. - N = len(self.lod[0]) - 1 + N = len(self.lod[0]) self.outputs['OrderedP0'] = np.zeros((N, self.P)).astype('float64') self.outputs['BatchGate'] = np.zeros((N, 4 * self.D)).astype('float64') self.outputs['BatchHidden'] = np.zeros((N, self.D)).astype('float64') @@ -183,7 +185,7 @@ class TestLstmpOpHasInitial(TestLstmpOp): def test_check_grad(self): # TODO(qingqing) remove folowing lines after the check_grad is refined. - N = len(self.lod[0]) - 1 + N = len(self.lod[0]) self.outputs['OrderedP0'] = np.zeros((N, self.P)).astype('float64') self.outputs['BatchGate'] = np.zeros((N, 4 * self.D)).astype('float64') self.outputs['BatchHidden'] = np.zeros((N, self.D)).astype('float64') @@ -195,7 +197,7 @@ class TestLstmpOpHasInitial(TestLstmpOp): max_relative_error=1e-2) def test_check_grad_ingore_bias(self): - N = len(self.lod[0]) - 1 + N = len(self.lod[0]) self.outputs['OrderedP0'] = np.zeros((N, self.P)).astype('float64') self.outputs['BatchGate'] = np.zeros((N, 4 * self.D)).astype('float64') self.outputs['BatchHidden'] = np.zeros((N, self.D)).astype('float64') @@ -207,7 +209,7 @@ class TestLstmpOpHasInitial(TestLstmpOp): no_grad_set=set('Bias')) def test_check_grad_ingore_weight(self): - N = len(self.lod[0]) - 1 + N = len(self.lod[0]) self.outputs['OrderedP0'] = np.zeros((N, self.P)).astype('float64') self.outputs['BatchGate'] = np.zeros((N, 4 * self.D)).astype('float64') self.outputs['BatchHidden'] = np.zeros((N, self.D)).astype('float64') @@ -219,7 +221,7 @@ class TestLstmpOpHasInitial(TestLstmpOp): no_grad_set=set('Weight')) def test_check_grad_ingore_proj_weight(self): - N = len(self.lod[0]) - 1 + N = len(self.lod[0]) self.outputs['OrderedP0'] = np.zeros((N, self.P)).astype('float64') self.outputs['BatchGate'] = np.zeros((N, 4 * self.D)).astype('float64') self.outputs['BatchHidden'] = np.zeros((N, self.D)).astype('float64') @@ -231,7 +233,7 @@ class TestLstmpOpHasInitial(TestLstmpOp): no_grad_set=set('ProjWeight')) def test_check_grad_ingore_input(self): - N = len(self.lod[0]) - 1 + N = len(self.lod[0]) self.outputs['OrderedP0'] = np.zeros((N, self.P)).astype('float64') self.outputs['BatchGate'] = np.zeros((N, 4 * self.D)).astype('float64') self.outputs['BatchHidden'] = np.zeros((N, self.D)).astype('float64') @@ -243,7 +245,7 @@ class TestLstmpOpHasInitial(TestLstmpOp): no_grad_set=set('Input')) def test_check_grad_ingore_h0(self): - N = len(self.lod[0]) - 1 + N = len(self.lod[0]) self.outputs['OrderedP0'] = np.zeros((N, self.P)).astype('float64') self.outputs['BatchGate'] = np.zeros((N, 4 * self.D)).astype('float64') self.outputs['BatchHidden'] = np.zeros((N, self.D)).astype('float64') @@ -255,7 +257,7 @@ class TestLstmpOpHasInitial(TestLstmpOp): no_grad_set=set('H0')) def test_check_grad_ingore_c0(self): - N = len(self.lod[0]) - 1 + N = len(self.lod[0]) self.outputs['OrderedP0'] = np.zeros((N, self.P)).astype('float64') self.outputs['BatchGate'] = np.zeros((N, 4 * self.D)).astype('float64') self.outputs['BatchHidden'] = np.zeros((N, self.D)).astype('float64') diff --git a/python/paddle/fluid/tests/unittests/test_mine_hard_examples_op.py b/python/paddle/fluid/tests/unittests/test_mine_hard_examples_op.py index c27573c3d6..54ee85c1a7 100644 --- a/python/paddle/fluid/tests/unittests/test_mine_hard_examples_op.py +++ b/python/paddle/fluid/tests/unittests/test_mine_hard_examples_op.py @@ -70,7 +70,7 @@ class TestMineHardExamplesOp(OpTest): self.updated_match_indices = self.match_indices - self.neg_indices_lod = [[0, 1, 2]] + self.neg_indices_lod = [[1, 1]] self.neg_indices = np.array([[1], [0]]).astype('int32') @@ -92,7 +92,7 @@ class TestMineHardExamplesOpHardExample(TestMineHardExamplesOp): self.updated_match_indices = np.array([[0, -1, -1], [-1, -1, -1]]).astype('int32') - self.neg_indices_lod = [[0, 1, 3]] + self.neg_indices_lod = [[1, 2]] self.neg_indices = np.array([[2], [0], [2]]).astype('int32') diff --git a/python/paddle/fluid/tests/unittests/test_multi_file_reader.py b/python/paddle/fluid/tests/unittests/test_multi_file_reader.py index 3f940203b9..dbd510e64f 100644 --- a/python/paddle/fluid/tests/unittests/test_multi_file_reader.py +++ b/python/paddle/fluid/tests/unittests/test_multi_file_reader.py @@ -64,8 +64,7 @@ class TestMultipleReader(unittest.TestCase): while True: try: img_val, = exe.run(fetch_list=[img]) - except fluid.core.EnforceNotMet as ex: - self.assertIn("There is no next data.", ex.message) + except fluid.core.EOFException: break batch_count += 1 self.assertLessEqual(img_val.shape[0], self.batch_size) diff --git a/python/paddle/fluid/tests/unittests/test_multi_pass_reader.py b/python/paddle/fluid/tests/unittests/test_multi_pass_reader.py index 52e7cc1ffb..7fc9f55044 100644 --- a/python/paddle/fluid/tests/unittests/test_multi_pass_reader.py +++ b/python/paddle/fluid/tests/unittests/test_multi_pass_reader.py @@ -59,8 +59,7 @@ class TestMultipleReader(unittest.TestCase): while True: try: img_val, = exe.run(fetch_list=[img]) - except fluid.core.EnforceNotMet as ex: - self.assertIn("There is no next data.", ex.message) + except fluid.core.EOFException: break batch_count += 1 self.assertLessEqual(img_val.shape[0], self.batch_size) diff --git a/python/paddle/fluid/tests/unittests/test_multiclass_nms_op.py b/python/paddle/fluid/tests/unittests/test_multiclass_nms_op.py index 6459913c01..aacd8ae45a 100644 --- a/python/paddle/fluid/tests/unittests/test_multiclass_nms_op.py +++ b/python/paddle/fluid/tests/unittests/test_multiclass_nms_op.py @@ -135,12 +135,12 @@ def batched_multiclass_nms(boxes, scores, background, score_threshold, batch_size = scores.shape[0] det_outs = [] - lod = [0] + lod = [] for n in range(batch_size): nmsed_outs, nmsed_num = multiclass_nms(boxes[n], scores[n], background, score_threshold, nms_threshold, nms_top_k, keep_top_k) - lod.append(lod[-1] + nmsed_num) + lod.append(nmsed_num) if nmsed_num == 0: continue for c, indices in nmsed_outs.iteritems(): diff --git a/python/paddle/fluid/tests/unittests/test_one_hot_op.py b/python/paddle/fluid/tests/unittests/test_one_hot_op.py index cd78cce872..d13f2b3afd 100644 --- a/python/paddle/fluid/tests/unittests/test_one_hot_op.py +++ b/python/paddle/fluid/tests/unittests/test_one_hot_op.py @@ -27,9 +27,9 @@ class TestOneHotOp(OpTest): self.op_type = 'one_hot' depth = 10 dimension = 12 - x_lod = [[0, 4, 5, 8, 11]] - x = [np.random.randint(0, depth - 1) for i in xrange(x_lod[0][-1])] - x = np.array(x).astype('int').reshape([x_lod[0][-1], 1]) + x_lod = [[4, 1, 3, 3]] + x = [np.random.randint(0, depth - 1) for i in xrange(sum(x_lod[0]))] + x = np.array(x).astype('int').reshape([sum(x_lod[0]), 1]) out = np.zeros(shape=(np.product(x.shape[:-1]), depth)).astype('float32') @@ -50,9 +50,9 @@ class TestOneHotOp_default_dtype(OpTest): self.op_type = 'one_hot' depth = 10 dimension = 12 - x_lod = [[0, 4, 5, 8, 11]] - x = [np.random.randint(0, depth - 1) for i in xrange(x_lod[0][-1])] - x = np.array(x).astype('int').reshape([x_lod[0][-1], 1]) + x_lod = [[4, 1, 3, 3]] + x = [np.random.randint(0, depth - 1) for i in xrange(sum(x_lod[0]))] + x = np.array(x).astype('int').reshape([sum(x_lod[0]), 1]) out = np.zeros(shape=(np.product(x.shape[:-1]), depth)).astype('float32') @@ -75,11 +75,11 @@ class TestOneHotOp_exception(OpTest): self.place = core.CPUPlace() self.dimension = 12 self.x = core.LoDTensor() - x_lod = [[0, 4, 5, 8, 11]] - data = [np.random.randint(11, 20) for i in xrange(x_lod[0][-1])] - data = np.array(data).astype('int').reshape([x_lod[0][-1], 1]) + x_lod = [[4, 1, 3, 3]] + data = [np.random.randint(11, 20) for i in xrange(sum(x_lod[0]))] + data = np.array(data).astype('int').reshape([sum(x_lod[0]), 1]) self.x.set(data, self.place) - self.x.set_lod(x_lod) + self.x.set_recursive_sequence_lengths(x_lod) def test_check_output(self): program = Program() diff --git a/python/paddle/fluid/tests/unittests/test_optimizer.py b/python/paddle/fluid/tests/unittests/test_optimizer.py index e775db1d10..7286c7c450 100644 --- a/python/paddle/fluid/tests/unittests/test_optimizer.py +++ b/python/paddle/fluid/tests/unittests/test_optimizer.py @@ -434,5 +434,71 @@ class TestDecayedAdagradOptimizer(unittest.TestCase): self.assertAlmostEqual(init_ops[1].attr('value'), 0.0) +class TestFtrlOptimizer(unittest.TestCase): + class MockFtrl(optimizer.FtrlOptimizer): + def get_accumulators(self): + return self._accumulators + + def get_squared_str(self): + return self._squared_acc_str + + def get_linear_str(self): + return self._linear_acc_str + + def test_ftrl_optimizer(self): + init_program = framework.Program() + program = framework.Program() + block = program.global_block() + mul_x = block.create_parameter( + dtype="float32", + shape=[5, 10], + lod_level=0, + name="mul.x", + optimize_attr={'learning_rate': 1.1}) + mul_y = block.create_var( + dtype="float32", shape=[10, 8], lod_level=0, name="mul.y") + mul_out = block.create_var( + dtype="float32", shape=[5, 8], lod_level=0, name="mul.out") + block.append_op( + type="mul", + inputs={"X": mul_x, + "Y": mul_y}, + outputs={"Out": mul_out}, + attrs={"x_num_col_dims": 1}) + mean_out = block.create_var( + dtype="float32", shape=[1], lod_level=0, name="mean.out") + block.append_op( + type="mean", inputs={"X": mul_out}, outputs={"Out": mean_out}) + learning_rate = 0.01 + ftrl_optimizer = self.MockFtrl( + learning_rate=learning_rate, l1=0.0, l2=0.0, lr_power=-0.5) + params_grads = append_backward(mean_out) + self.assertEqual(len(params_grads), 1) + self.assertEqual(len(ftrl_optimizer.get_accumulators()), 0) + opts = ftrl_optimizer.create_optimization_pass(params_grads, mul_out, + init_program) + self.assertEqual(len(opts), 3) + self.assertEqual([op.type for op in opts], + ["fill_constant", "elementwise_mul", "ftrl"]) + + # Check accumulators + accumulators = ftrl_optimizer.get_accumulators() + self.assertEqual(len(accumulators), 2) + self.assertTrue(ftrl_optimizer.get_squared_str() in accumulators) + self.assertTrue(ftrl_optimizer.get_linear_str() in accumulators) + squared_acc = accumulators[ftrl_optimizer.get_squared_str()] + linear_acc = accumulators[ftrl_optimizer.get_linear_str()] + self.assertEqual(len(squared_acc), 1) + self.assertEqual(len(linear_acc), 1) + self.assertTrue(mul_x.name in squared_acc) + self.assertTrue(mul_x.name in linear_acc) + + # Check init_program + init_ops = init_program.global_block().ops + self.assertEqual(len(init_ops), 3) + self.assertEqual(init_ops[0].type, "fill_constant") + self.assertAlmostEqual(init_ops[0].attr('value'), learning_rate) + + if __name__ == '__main__': unittest.main() diff --git a/python/paddle/fluid/tests/unittests/test_parallel_executor_crf.py b/python/paddle/fluid/tests/unittests/test_parallel_executor_crf.py index 163975555e..63fb58c692 100644 --- a/python/paddle/fluid/tests/unittests/test_parallel_executor_crf.py +++ b/python/paddle/fluid/tests/unittests/test_parallel_executor_crf.py @@ -169,10 +169,10 @@ class TestCRFModel(unittest.TestCase): data = train_data() for i in xrange(10): cur_batch = next(data) - print map(np.array, - pe.run(feed=feeder.feed(cur_batch), - fetch_list=[avg_cost.name]))[0] + print pe.run(feed=feeder.feed(cur_batch), + fetch_list=[avg_cost.name])[0] + @unittest.skip(reason="CI hangs") def test_update_sparse_parameter_all_reduce(self): build_strategy = fluid.BuildStrategy() build_strategy.reduce_strategy = fluid.BuildStrategy.ReduceStrategy.AllReduce @@ -181,6 +181,7 @@ class TestCRFModel(unittest.TestCase): self.check_network_convergence( is_sparse=True, build_strategy=build_strategy, use_cuda=False) + @unittest.skip(reason="CI hangs") def test_update_dense_parameter_all_reduce(self): build_strategy = fluid.BuildStrategy() build_strategy.reduce_strategy = fluid.BuildStrategy.ReduceStrategy.AllReduce @@ -189,6 +190,7 @@ class TestCRFModel(unittest.TestCase): self.check_network_convergence( is_sparse=False, build_strategy=build_strategy, use_cuda=False) + @unittest.skip(reason="CI hangs") def test_update_sparse_parameter_reduce(self): build_strategy = fluid.BuildStrategy() build_strategy.reduce_strategy = fluid.BuildStrategy.ReduceStrategy.Reduce @@ -197,6 +199,7 @@ class TestCRFModel(unittest.TestCase): self.check_network_convergence( is_sparse=True, build_strategy=build_strategy, use_cuda=False) + @unittest.skip(reason="CI hangs") def test_update_dense_parameter_reduce(self): build_strategy = fluid.BuildStrategy() build_strategy.reduce_strategy = fluid.BuildStrategy.ReduceStrategy.Reduce diff --git a/python/paddle/fluid/tests/unittests/test_parallel_executor_fetch_feed.py b/python/paddle/fluid/tests/unittests/test_parallel_executor_fetch_feed.py index 79702475cc..1f5d2f1677 100644 --- a/python/paddle/fluid/tests/unittests/test_parallel_executor_fetch_feed.py +++ b/python/paddle/fluid/tests/unittests/test_parallel_executor_fetch_feed.py @@ -75,7 +75,9 @@ class TestFetchOp(unittest.TestCase): fetch_list.append(k) for data in train_inputs: - ret = pe.run(fetch_list, feed=feeder.feed(data)) + ret = pe.run(fetch_list, + feed=feeder.feed(data), + return_numpy=True) for i in range(len(fetch_list)): assert not math.isnan(np.sum(ret[i])) and \ not math.isinf(np.sum(ret[i])) @@ -128,7 +130,7 @@ class TestFeedParallel(unittest.TestCase): use_cuda=use_cuda, loss_name=loss.name, main_program=main) for batch_id, data in enumerate(reader()): - loss_np = np.array(pe.run(feed=data, fetch_list=[loss.name])[0]) + loss_np = pe.run(feed=data, fetch_list=[loss.name])[0] print batch_id, loss_np if batch_id == 2: break diff --git a/python/paddle/fluid/tests/unittests/test_parallel_executor_test_while_train.py b/python/paddle/fluid/tests/unittests/test_parallel_executor_test_while_train.py index 31ba8c1d60..9a2733927d 100644 --- a/python/paddle/fluid/tests/unittests/test_parallel_executor_test_while_train.py +++ b/python/paddle/fluid/tests/unittests/test_parallel_executor_test_while_train.py @@ -16,6 +16,8 @@ import paddle.fluid as fluid import numpy as np import unittest import os +import sys +import math def simple_fc_net(): @@ -70,10 +72,17 @@ class ParallelExecutorTestingDuringTraining(unittest.TestCase): for i in xrange(5): test_loss, = test_exe.run([loss.name], feed=feed_dict) - test_loss = np.array(test_loss) train_loss, = train_exe.run([loss.name], feed=feed_dict) - train_loss = np.array(train_loss) + + avg_test_loss_val = np.array(test_loss).mean() + if math.isnan(float(avg_test_loss_val)): + sys.exit("got NaN loss, testing failed.") + + avg_train_loss_val = np.array(train_loss).mean() + if math.isnan(float(avg_train_loss_val)): + sys.exit("got NaN loss, training failed.") + self.assertTrue( np.allclose( train_loss, test_loss, atol=1e-8), diff --git a/python/paddle/fluid/tests/unittests/test_parallel_op.py b/python/paddle/fluid/tests/unittests/test_parallel_op.py index 79bea148f9..9ba5f988f3 100644 --- a/python/paddle/fluid/tests/unittests/test_parallel_op.py +++ b/python/paddle/fluid/tests/unittests/test_parallel_op.py @@ -113,7 +113,9 @@ class BaseParallelForTest(unittest.TestCase): generator = callback() # Automatically insert parallel do if use_parallel = True if use_parallel: - places = fluid.layers.get_places() + thread_num = fluid.core.get_cuda_device_count( + ) if use_gpu else 8 + places = fluid.layers.get_places(thread_num) pd = fluid.layers.ParallelDo(places, use_nccl=use_nccl) data = next(generator) diff --git a/python/paddle/fluid/tests/unittests/test_print_op.py b/python/paddle/fluid/tests/unittests/test_print_op.py index c75080fbb9..e01af42a58 100644 --- a/python/paddle/fluid/tests/unittests/test_print_op.py +++ b/python/paddle/fluid/tests/unittests/test_print_op.py @@ -28,7 +28,7 @@ class TestPrintOpCPU(unittest.TestCase): self.x_tensor = core.LoDTensor() tensor_np = np.random.random(size=(2, 3)).astype('float32') self.x_tensor.set(tensor_np, self.place) - self.x_tensor.set_lod([[0, 1, 1]]) + self.x_tensor.set_recursive_sequence_lengths([[1, 1]]) def build_network(self, only_forward, **kargs): x = layers.data('x', shape=[3], dtype='float32', lod_level=1) @@ -62,7 +62,7 @@ class TestPrintOpGPU(TestPrintOpCPU): self.x_tensor = core.LoDTensor() tensor_np = np.random.random(size=(2, 3)).astype('float32') self.x_tensor.set(tensor_np, self.place) - self.x_tensor.set_lod([[0, 1, 1]]) + self.x_tensor.set_recursive_sequence_lengths([[1, 1]]) if __name__ == '__main__': diff --git a/python/paddle/fluid/tests/unittests/test_recordio_reader.py b/python/paddle/fluid/tests/unittests/test_recordio_reader.py index f32050014d..69a522e273 100644 --- a/python/paddle/fluid/tests/unittests/test_recordio_reader.py +++ b/python/paddle/fluid/tests/unittests/test_recordio_reader.py @@ -68,8 +68,7 @@ class TestRecordIO(unittest.TestCase): while True: try: tmp, = exe.run(fetch_list=[avg_loss]) - except fluid.core.EnforceNotMet as ex: - self.assertIn("There is no next data.", ex.message) + except fluid.core.EOFException: break avg_loss_np.append(tmp) diff --git a/python/paddle/fluid/tests/unittests/test_reorder_lod_tensor.py b/python/paddle/fluid/tests/unittests/test_reorder_lod_tensor.py index 76d0d2f2fe..a70321bd80 100644 --- a/python/paddle/fluid/tests/unittests/test_reorder_lod_tensor.py +++ b/python/paddle/fluid/tests/unittests/test_reorder_lod_tensor.py @@ -70,11 +70,10 @@ class TestReorderLoDTensor(unittest.TestCase): lod_level_i = numpy.random.randint( low=1, high=5, - size=self.num_seq if i == 0 else lod_level_i[-1]) - lod_level_i = [0] + numpy.cumsum(lod_level_i).tolist() + size=self.num_seq if i == 0 else sum(lod_level_i)).tolist() data_lod.append(lod_level_i) data_value = numpy.random.random( - size=[data_lod[-1][-1] if data_lod else self.num_seq + size=[sum(data_lod[-1]) if data_lod else self.num_seq ] + data_shape).astype('float32') self.data[data_name] = (data_value, data_lod) @@ -84,29 +83,36 @@ class TestReorderLoDTensor(unittest.TestCase): tensor = fluid.Tensor() tensor.set(self.data[desc[0]][0], place) if self.data[desc[0]][1]: - tensor.set_lod(self.data[desc[0]][1]) + tensor.set_recursive_sequence_lengths(self.data[desc[0]][1]) self.inputs[desc[0]] = tensor def reorder(self): - level = 0 + def convert_to_offset(lod): + offset_lod = [[0] for i in lod] + for i, level in enumerate(lod): + for seq_len in level: + offset_lod[i].append(offset_lod[i][-1] + seq_len) + return offset_lod + level = 0 # compute the rank_table according to ref_lod ref_lod = self.data[self.data_desc[1][0]][1][level] rank_table = [] # list of (index, length) - for i in range(len(ref_lod) - 1): - rank_table.append((i, ref_lod[i + 1] - ref_lod[i])) + for i in range(len(ref_lod)): + rank_table.append((i, ref_lod[i])) rank_table = sorted(rank_table, lambda x, y: y[1] - x[1]) # compute the input sequence info according to input_lod input_value, input_lod = self.data[self.data_desc[0][0]] + offset_lod = convert_to_offset(input_lod) input_table = [] # list of (offset, length, sub_lod) - if input_lod: - for i in range(len(input_lod[level]) - 1): + if offset_lod: + for i in range(len(offset_lod[level]) - 1): start_idx = i end_idx = i + 1 sub_lod = [] - for lod_level_i in input_lod[level:]: + for lod_level_i in offset_lod[level:]: sub_lod_i = [] for idx in range(start_idx, end_idx): sub_lod_i.append(lod_level_i[idx + 1] - lod_level_i[ @@ -132,10 +138,9 @@ class TestReorderLoDTensor(unittest.TestCase): input_seq_sub_lod = input_table[index][2] if len(output_lod) == 0: - output_lod = [[0] for i in input_seq_sub_lod] - for i, sub_lod_i in enumerate(input_seq_sub_lod): - for idx_sub in sub_lod_i: - output_lod[i].append(output_lod[i][-1] + idx_sub) + output_lod = [[] for i in input_seq_sub_lod] + for i, level in enumerate(input_seq_sub_lod): + output_lod[i].extend(level) return output_value, output_lod def test_reorder_lod_tensor(self): @@ -148,7 +153,8 @@ class TestReorderLoDTensor(unittest.TestCase): self.assertTrue( numpy.allclose( numpy.array(actual_output), expect_output, atol=0.001)) - self.assertEqual(expect_output_lod, actual_output.lod()) + self.assertEqual(expect_output_lod, + actual_output.recursive_sequence_lengths()) # check gradient expect_grad = numpy.ones_like(self.data[self.data_desc[0][0]][0]) expect_grad_lod = self.data[self.data_desc[0][0]][1] @@ -156,7 +162,8 @@ class TestReorderLoDTensor(unittest.TestCase): self.assertTrue( numpy.allclose( numpy.array(actual_grad), expect_grad, atol=0.001)) - self.assertEqual(expect_grad_lod, actual_grad.lod()) + self.assertEqual(expect_grad_lod, + actual_grad.recursive_sequence_lengths()) def test_reorder_tensor(self): self.data_desc[0][-1] = 0 # input is tensor @@ -168,7 +175,8 @@ class TestReorderLoDTensor(unittest.TestCase): self.assertTrue( numpy.allclose( numpy.array(actual_output), expect_output, atol=0.001)) - self.assertEqual(expect_output_lod, actual_output.lod()) + self.assertEqual(expect_output_lod, + actual_output.recursive_sequence_lengths()) # check gradient expect_grad = numpy.ones_like(self.data[self.data_desc[0][0]][0]) expect_grad_lod = self.data[self.data_desc[0][0]][1] @@ -176,14 +184,14 @@ class TestReorderLoDTensor(unittest.TestCase): self.assertTrue( numpy.allclose( numpy.array(actual_grad), expect_grad, atol=0.001)) - self.assertEqual(expect_grad_lod, actual_grad.lod()) + self.assertEqual(expect_grad_lod, + actual_grad.recursive_sequence_lengths()) # compare outputs between LodTensors with explicit and implicit lod # use the same data but set the input lod explicitly - input_lod = [[ - i for i in range(len(self.data[self.data_desc[0][0]][0]) + 1) - ]] - self.inputs[self.data_desc[0][0]].set_lod(input_lod) + input_lod = [[1] * len(self.data[self.data_desc[0][0]][0])] + self.inputs[self.data_desc[0][0]].set_recursive_sequence_lengths( + input_lod) # preserve the output of LodTensor with implicit lod to compare expect_output = [ numpy.array(actual_output) for actual_output in self.actual_outputs diff --git a/python/paddle/fluid/tests/unittests/test_roi_pool_op.py b/python/paddle/fluid/tests/unittests/test_roi_pool_op.py index 3d754aff3a..df5684ab17 100644 --- a/python/paddle/fluid/tests/unittests/test_roi_pool_op.py +++ b/python/paddle/fluid/tests/unittests/test_roi_pool_op.py @@ -107,7 +107,7 @@ class TestROIPoolOp(OpTest): rois = [] self.rois_lod = [[]] for bno in range(self.batch_size): - self.rois_lod[0].append(len(rois)) + self.rois_lod[0].append(bno + 1) for i in range(bno + 1): x1 = np.random.random_integers( 0, self.width / self.spatial_scale - self.pooled_width) @@ -121,7 +121,6 @@ class TestROIPoolOp(OpTest): roi = [bno, x1, y1, x2, y2] rois.append(roi) - self.rois_lod[0].append(len(rois)) self.rois_num = len(rois) self.rois = np.array(rois).astype("int64") diff --git a/python/paddle/fluid/tests/unittests/test_row_conv_op.py b/python/paddle/fluid/tests/unittests/test_row_conv_op.py index 30f1efbcbc..07dcd10868 100644 --- a/python/paddle/fluid/tests/unittests/test_row_conv_op.py +++ b/python/paddle/fluid/tests/unittests/test_row_conv_op.py @@ -19,8 +19,10 @@ from op_test import OpTest def row_conv_forward(x, lod, wt): out = np.zeros_like(x) - seq_info = lod[0] - num_sequences = len(seq_info) - 1 + num_sequences = len(lod[0]) + seq_info = [0] + for seq_len in lod[0]: + seq_info.append(seq_info[-1] + seq_len) context_length = wt.shape[0] for i in range(num_sequences): # loop over number of sequences @@ -32,7 +34,6 @@ def row_conv_forward(x, lod, wt): cur_timesteps = end - start for j in range(cur_timesteps): # loop over different timesteps for k in range(context_length): - if j + k >= cur_timesteps: continue curoutput[j, :] += curinput[j + k, :] * wt[k, :] @@ -44,8 +45,8 @@ class TestRowConvOp1(OpTest): def setUp(self): self.op_type = "row_conv" - lod = [[0, 2, 5, 7]] - T = lod[0][-1] + lod = [[2, 3, 2]] + T = sum(lod[0]) D = 16 context_length = 2 @@ -75,8 +76,8 @@ class TestRowConvOp2(OpTest): def setUp(self): self.op_type = "row_conv" - lod = [[0, 20, 50, 100]] - T = lod[0][-1] + lod = [[20, 30, 50]] + T = sum(lod[0]) D = 35 context_length = 35 diff --git a/python/paddle/fluid/tests/unittests/test_seq_concat_op.py b/python/paddle/fluid/tests/unittests/test_seq_concat_op.py index 10592d127f..11ffa761a6 100644 --- a/python/paddle/fluid/tests/unittests/test_seq_concat_op.py +++ b/python/paddle/fluid/tests/unittests/test_seq_concat_op.py @@ -18,14 +18,19 @@ import sys from op_test import OpTest -def to_abs_lod(lod): - if len(lod) == 0 or len(lod) == 1: - return lod +def to_abs_offset_lod(lod): + offset_lod = [[0] for i in lod] + for i, level in enumerate(lod): + for seq_len in level: + offset_lod[i].append(offset_lod[i][-1] + seq_len) + + if len(offset_lod) == 0 or len(offset_lod) == 1: + return offset_lod import copy - new_lod = copy.deepcopy(lod) - for idx, val in enumerate(lod[0]): - new_lod[0][idx] = lod[1][val] - return new_lod + new_offset_lod = copy.deepcopy(offset_lod) + for idx, val in enumerate(offset_lod[0]): + new_offset_lod[0][idx] = offset_lod[1][val] + return new_offset_lod def seq_concat(inputs, level): @@ -35,11 +40,11 @@ def seq_concat(inputs, level): x1 = inputs['X'][1][1][0] level_idx = len(lod0) - level - 1 outs = [] - for i in range(len(lod0[level_idx]) - 1): - sub_x0 = x0[to_abs_lod(lod0)[level_idx][i]:to_abs_lod(lod0)[level_idx][ - i + 1], :] - sub_x1 = x1[to_abs_lod(lod1)[level_idx][i]:to_abs_lod(lod1)[level_idx][ - i + 1], :] + for i in range(len(lod0[level_idx])): + sub_x0 = x0[to_abs_offset_lod(lod0)[level_idx][i]:to_abs_offset_lod( + lod0)[level_idx][i + 1], :] + sub_x1 = x1[to_abs_offset_lod(lod1)[level_idx][i]:to_abs_offset_lod( + lod1)[level_idx][i + 1], :] outs.append(np.concatenate((sub_x0, sub_x1), axis=0)) return np.concatenate(outs, axis=0) @@ -48,9 +53,9 @@ class TestSeqConcatOp(OpTest): def set_data(self): # two level, batch size is 3 x0 = np.random.random((4, 6, 3)).astype('float32') - lod0 = [[0, 2, 4], [0, 1, 2, 3, 4]] + lod0 = [[2, 2], [1, 1, 1, 1]] x1 = np.random.random((4, 8, 3)).astype('float32') - lod1 = [[0, 2, 4], [0, 1, 2, 3, 4]] + lod1 = [[2, 2], [1, 1, 1, 1]] axis = 1 level = 1 self.inputs = {'X': [('x0', (x0, lod0)), ('x1', (x1, lod1))]} @@ -72,14 +77,14 @@ class TestSeqConcatOpLevelZeroNestedSequence(TestSeqConcatOp): def set_data(self): # two level, batch size is 3 x0 = np.random.random((4, 6, 3)).astype('float32') - lod0 = [[0, 2, 4], [0, 1, 2, 3, 4]] + lod0 = [[2, 2], [1, 1, 1, 1]] x1 = np.random.random((7, 6, 3)).astype('float32') - lod1 = [[0, 2, 4], [0, 1, 3, 5, 7]] + lod1 = [[2, 2], [1, 2, 2, 2]] axis = 0 level = 0 self.inputs = {'X': [('x0', (x0, lod0)), ('x1', (x1, lod1))]} self.attrs = {'axis': axis, 'level': level} - out_lod = [[0, 2, 4], [0, 2, 5, 8, 11]] + out_lod = [[2, 2], [2, 3, 3, 3]] self.outputs = {'Out': (seq_concat(self.inputs, level), out_lod)} @@ -87,14 +92,14 @@ class TestSeqConcatOplevelOneNestedSequence(TestSeqConcatOp): def set_data(self): # two level, batch size is 3 x0 = np.random.random((4, 6, 3)).astype('float32') - lod0 = [[0, 2, 4], [0, 1, 2, 3, 4]] + lod0 = [[2, 2], [1, 1, 1, 1]] x1 = np.random.random((7, 6, 3)).astype('float32') - lod1 = [[0, 3, 4], [0, 1, 3, 5, 7]] + lod1 = [[3, 1], [1, 2, 2, 2]] axis = 0 level = 1 self.inputs = {'X': [('x0', (x0, lod0)), ('x1', (x1, lod1))]} self.attrs = {'axis': axis, 'level': level} - out_lod = [[0, 5, 8], [0, 1, 2, 3, 5, 7, 8, 9, 11]] + out_lod = [[5, 3], [1, 1, 1, 2, 2, 1, 1, 2]] self.outputs = {'Out': (seq_concat(self.inputs, level), out_lod)} @@ -102,14 +107,14 @@ class TestSeqConcatOpLevelZeroSequence(TestSeqConcatOp): def set_data(self): # two level, batch size is 3 x0 = np.random.random((4, 3, 4)).astype('float32') - lod0 = [[0, 1, 2, 3, 4]] + lod0 = [[1, 1, 1, 1]] x1 = np.random.random((7, 3, 4)).astype('float32') - lod1 = [[0, 1, 3, 5, 7]] + lod1 = [[1, 2, 2, 2]] axis = 0 level = 0 self.inputs = {'X': [('x0', (x0, lod0)), ('x1', (x1, lod1))]} self.attrs = {'axis': axis, 'level': level} - out_lod = [[0, 2, 5, 8, 11]] + out_lod = [[2, 3, 3, 3]] self.outputs = {'Out': (seq_concat(self.inputs, level), out_lod)} diff --git a/python/paddle/fluid/tests/unittests/test_seq_conv.py b/python/paddle/fluid/tests/unittests/test_seq_conv.py index 51dbf1f618..9701d9adef 100644 --- a/python/paddle/fluid/tests/unittests/test_seq_conv.py +++ b/python/paddle/fluid/tests/unittests/test_seq_conv.py @@ -75,35 +75,38 @@ class TestSeqProject(OpTest): pading_data = self.pad_data out = np.zeros((self.input_size[0], self.context_length * self.input_size[1])).astype('float32') - lod = lod[0] + offset = [0] + for seq_len in lod[0]: + offset.append(offset[-1] + seq_len) begin_pad = np.max([0, -self.context_start]) - for i in range(len(lod) - 1): + for i in range(len(offset) - 1): for j in range(self.context_length): - in_begin = lod[i] + self.context_start + j - in_end = lod[i + 1] + self.context_start + j - out_begin = lod[i] - out_end = lod[i + 1] - if in_begin < lod[i]: - pad_size = np.min([lod[i] - in_begin, lod[i + 1] - lod[i]]) + in_begin = offset[i] + self.context_start + j + in_end = offset[i + 1] + self.context_start + j + out_begin = offset[i] + out_end = offset[i + 1] + if in_begin < offset[i]: + pad_size = np.min( + [offset[i] - in_begin, offset[i + 1] - offset[i]]) if self.padding_trainable: sub_w = pading_data[j:j + pad_size, :] - out[lod[i]:lod[i] + pad_size, j * self.input_size[1]:( - j + 1) * self.input_size[1]] = sub_w - out_begin = lod[i] + pad_size - in_begin = lod[i] + out[offset[i]:offset[i] + pad_size, j * self.input_size[ + 1]:(j + 1) * self.input_size[1]] = sub_w + out_begin = offset[i] + pad_size + in_begin = offset[i] - if in_end > lod[i + 1]: + if in_end > offset[i + 1]: pad_size = np.min( - [in_end - lod[i + 1], lod[i + 1] - lod[i]]) + [in_end - offset[i + 1], offset[i + 1] - offset[i]]) if self.padding_trainable: sub_w = pading_data[begin_pad + self.context_start + j - pad_size:begin_pad + self.context_start + j, :] - out[lod[i + 1] - pad_size:lod[i + 1], j * self. + out[offset[i + 1] - pad_size:offset[i + 1], j * self. input_size[1]:(j + 1) * self.input_size[1]] = sub_w - in_end = lod[i + 1] - out_end = lod[i + 1] - pad_size + in_end = offset[i + 1] + out_end = offset[i + 1] - pad_size if in_end <= in_begin: continue @@ -175,7 +178,11 @@ class TestSeqProject(OpTest): self.context_stride = 1 self.input_size = [self.input_row, 23] - self.lod = [[0, 4, 5, 8, self.input_row]] + offset_lod = [[0, 4, 5, 8, self.input_row]] + self.lod = [[]] + # convert from offset-based lod to length-based lod + for i in range(len(offset_lod[0]) - 1): + self.lod[0].append(offset_lod[0][i + 1] - offset_lod[0][i]) self.output_represention = 8 # output feature size @@ -188,7 +195,11 @@ class TestSeqProjectCase1(TestSeqProject): self.context_stride = 1 self.input_size = [self.input_row, 23] - self.lod = [[0, 4, 5, 8, self.input_row]] + offset_lod = [[0, 4, 5, 8, self.input_row]] + self.lod = [[]] + # convert from offset-based lod to length-based lod + for i in range(len(offset_lod[0]) - 1): + self.lod[0].append(offset_lod[0][i + 1] - offset_lod[0][i]) self.output_represention = 8 # output feature size @@ -203,8 +214,12 @@ class TestSeqProjectCase2(TestSeqProject): self.input_size = [self.input_row, 23] idx = range(self.input_size[0]) del idx[0] - self.lod = [[0] + np.sort(random.sample(idx, 8)).tolist() + - [self.input_size[0]]] + offset_lod = [[0] + np.sort(random.sample(idx, 8)).tolist() + + [self.input_size[0]]] + self.lod = [[]] + # convert from offset-based lod to length-based lod + for i in range(len(offset_lod[0]) - 1): + self.lod[0].append(offset_lod[0][i + 1] - offset_lod[0][i]) self.output_represention = 8 # output feature size diff --git a/python/paddle/fluid/tests/unittests/test_seq_pool.py b/python/paddle/fluid/tests/unittests/test_seq_pool.py index 2e48ef0e88..0b3659d7a6 100644 --- a/python/paddle/fluid/tests/unittests/test_seq_pool.py +++ b/python/paddle/fluid/tests/unittests/test_seq_pool.py @@ -18,26 +18,34 @@ from op_test import OpTest class TestSeqAvgPool(OpTest): + def convert_to_offset(self, lod): + offset = [[0] for i in lod] + for i, level in enumerate(lod): + for seq_len in level: + offset[i].append(offset[i][-1] + seq_len) + return offset + def set_data(self): self.op_type = 'sequence_pool' # one level, batch size is 4 x = np.random.uniform(0.1, 1, [11, 23]).astype('float32') - lod = [[0, 4, 5, 8, 11]] + lod = [[4, 1, 3, 3]] self.inputs = {'X': (x, lod)} + offset = self.convert_to_offset(lod) out = np.zeros((4, 23)).astype('float32') self.outputs = {'Out': out} - return x, lod, out + return x, offset, out - def compute(self, x, lod, out): + def compute(self, x, offset, out): self.attrs = {'pooltype': "AVERAGE"} - for i in range(4): - sub_x = x[lod[0][i]:lod[0][i + 1], :] + for i in range(len(offset[0]) - 1): + sub_x = x[offset[0][i]:offset[0][i + 1], :] out[i] = sub_x.mean(axis=0) def setUp(self): - x, lod, out = self.set_data() - self.compute(x, lod, out) + x, offset, out = self.set_data() + self.compute(x, offset, out) def test_check_output(self): self.check_output() @@ -50,10 +58,10 @@ class TestSeqAvgPool(OpTest): class TestSeqSumPool(TestSeqAvgPool): - def compute(self, x, lod, out): + def compute(self, x, offset, out): self.attrs = {'pooltype': "SUM"} - for i in range(4): - sub_x = x[lod[0][i]:lod[0][i + 1], :] + for i in range(len(offset[0]) - 1): + sub_x = x[offset[0][i]:offset[0][i + 1], :] out[i] = sub_x.sum(axis=0) @@ -61,46 +69,47 @@ class TestSeqMaxPool(TestSeqAvgPool): def set_data(self): self.op_type = 'sequence_pool' x = np.random.uniform(0.1, 1, [13, 23]).astype('float32') - lod = [[0, 4, 5, 8, 13]] - for i in range(4): - l = lod[0][i + 1] - lod[0][i] - x[lod[0][i] + np.random.randint(l), :] += 2.0 + lod = [[4, 1, 3, 5]] + offset = self.convert_to_offset(lod) + for i in range(len(offset[0]) - 1): + l = offset[0][i + 1] - offset[0][i] + x[offset[0][i] + np.random.randint(l), :] += 2.0 self.inputs = {'X': (x, lod)} out = np.zeros((4, 23)).astype('float32') self.outputs = {'Out': out} - return x, lod, out + return x, offset, out - def compute(self, x, lod, out): + def compute(self, x, offset, out): self.attrs = {'pooltype': "MAX"} - for i in range(4): - sub_x = x[lod[0][i]:lod[0][i + 1], :] + for i in range(len(offset[0]) - 1): + sub_x = x[offset[0][i]:offset[0][i + 1], :] out[i] = np.amax(sub_x, axis=0) class TestSeqSqrtPool(TestSeqAvgPool): - def compute(self, x, lod, out): + def compute(self, x, offset, out): self.attrs = {'pooltype': "SQRT"} - for i in range(4): - sub_x = x[lod[0][i]:lod[0][i + 1], :] - len = lod[0][i + 1] - lod[0][i] - out[i] = sub_x.sum(axis=0) / np.sqrt(len) + for i in range(len(offset[0]) - 1): + sub_x = x[offset[0][i]:offset[0][i + 1], :] + seq_len = offset[0][i + 1] - offset[0][i] + out[i] = sub_x.sum(axis=0) / np.sqrt(seq_len) class TestSeqLastPool(TestSeqAvgPool): - def compute(self, x, lod, out): + def compute(self, x, offset, out): self.attrs = {'pooltype': "LAST"} - for i in range(4): - sub_x = x[lod[0][i]:lod[0][i + 1], :] + for i in range(len(offset[0]) - 1): + sub_x = x[offset[0][i]:offset[0][i + 1], :] out[i] = sub_x[-1, :] class TestSeqFirstPool(TestSeqAvgPool): - def compute(self, x, lod, out): + def compute(self, x, offset, out): self.attrs = {'pooltype': "FIRST"} - for i in range(4): - sub_x = x[lod[0][i]:lod[0][i + 1], :] + for i in range(len(offset[0]) - 1): + sub_x = x[offset[0][i]:offset[0][i + 1], :] out[i] = sub_x[0, :] @@ -109,35 +118,39 @@ class TestSeqAvgPool2D(TestSeqAvgPool): self.op_type = 'sequence_pool' # one level, batch size is 4 x = np.random.uniform(0.1, 1, [13, 3, 17]).astype('float32') - lod = [[0, 4, 5, 8, 13]] + lod = [[4, 1, 3, 5]] self.inputs = {'X': (x, lod)} + offset = self.convert_to_offset(lod) out = np.zeros((4, 3, 17)).astype('float32') self.outputs = {'Out': out} - return x, lod, out + return x, offset, out - def compute(self, x, lod, out): + def compute(self, x, offset, out): self.attrs = {'pooltype': "AVERAGE"} - for i in range(4): - sub_x = np.reshape(x[lod[0][i]:lod[0][i + 1], :], (-1, 3 * 17)) + for i in range(len(offset[0]) - 1): + sub_x = np.reshape(x[offset[0][i]:offset[0][i + 1], :], + (-1, 3 * 17)) out[i] = np.reshape(sub_x.mean(axis=0), (3, 17)) class TestSeqSumPool2D(TestSeqAvgPool2D): - def compute(self, x, lod, out): + def compute(self, x, offset, out): self.attrs = {'pooltype': "SUM"} - for i in range(4): - sub_x = np.reshape(x[lod[0][i]:lod[0][i + 1], :], (-1, 3 * 17)) + for i in range(len(offset[0]) - 1): + sub_x = np.reshape(x[offset[0][i]:offset[0][i + 1], :], + (-1, 3 * 17)) out[i] = np.reshape(sub_x.sum(axis=0), (3, 17)) class TestSeqSqrtPool2D(TestSeqAvgPool2D): - def compute(self, x, lod, out): + def compute(self, x, offset, out): self.attrs = {'pooltype': "SQRT"} - for i in range(4): - sub_x = np.reshape(x[lod[0][i]:lod[0][i + 1], :], (-1, 3 * 17)) - len = lod[0][i + 1] - lod[0][i] - out[i] = np.reshape(sub_x.sum(axis=0) / np.sqrt(len), (3, 17)) + for i in range(len(offset[0]) - 1): + sub_x = np.reshape(x[offset[0][i]:offset[0][i + 1], :], + (-1, 3 * 17)) + seq_len = offset[0][i + 1] - offset[0][i] + out[i] = np.reshape(sub_x.sum(axis=0) / np.sqrt(seq_len), (3, 17)) def test_check_grad(self): # Remove MaxIndex after check_grad is refined. @@ -150,36 +163,40 @@ class TestSeqMaxPool2D(TestSeqAvgPool2D): def set_data(self): self.op_type = 'sequence_pool' x = np.random.uniform(0.1, 1, [13, 3, 11]).astype('float32') - lod = [[0, 4, 5, 8, 13]] + lod = [[4, 1, 3, 5]] self.inputs = {'X': (x, lod)} - for i in range(4): - l = lod[0][i + 1] - lod[0][i] - x[lod[0][i] + np.random.randint(l), :] += 1.0 + offset = self.convert_to_offset(lod) + for i in range(len(offset[0]) - 1): + l = offset[0][i + 1] - offset[0][i] + x[offset[0][i] + np.random.randint(l), :] += 1.0 out = np.zeros((4, 3, 11)).astype('float32') self.outputs = {'Out': out} - return x, lod, out + return x, offset, out - def compute(self, x, lod, out): + def compute(self, x, offset, out): self.attrs = {'pooltype': "MAX"} - for i in range(4): - sub_x = np.reshape(x[lod[0][i]:lod[0][i + 1], :], (-1, 3 * 11)) + for i in range(len(offset[0]) - 1): + sub_x = np.reshape(x[offset[0][i]:offset[0][i + 1], :], + (-1, 3 * 11)) out[i] = np.reshape(np.amax(sub_x, axis=0), (3, 11)) class TestSeqLastPool2D(TestSeqAvgPool2D): - def compute(self, x, lod, out): + def compute(self, x, offset, out): self.attrs = {'pooltype': "LAST"} - for i in range(4): - sub_x = np.reshape(x[lod[0][i]:lod[0][i + 1], :], (-1, 3 * 17)) + for i in range(len(offset[0]) - 1): + sub_x = np.reshape(x[offset[0][i]:offset[0][i + 1], :], + (-1, 3 * 17)) out[i] = np.reshape(sub_x[-1, :], (3, 17)) class TestSeqFirstPool2D(TestSeqAvgPool2D): - def compute(self, x, lod, out): + def compute(self, x, offset, out): self.attrs = {'pooltype': "FIRST"} - for i in range(4): - sub_x = np.reshape(x[lod[0][i]:lod[0][i + 1], :], (-1, 3 * 17)) + for i in range(len(offset[0]) - 1): + sub_x = np.reshape(x[offset[0][i]:offset[0][i + 1], :], + (-1, 3 * 17)) out[i] = np.reshape(sub_x[0, :], (3, 17)) diff --git a/python/paddle/fluid/tests/unittests/test_sequence_erase_op.py b/python/paddle/fluid/tests/unittests/test_sequence_erase_op.py index ebab77e804..8f0765277a 100644 --- a/python/paddle/fluid/tests/unittests/test_sequence_erase_op.py +++ b/python/paddle/fluid/tests/unittests/test_sequence_erase_op.py @@ -18,15 +18,17 @@ from op_test import OpTest def sequence_erase(in_seq, lod0, tokens): - new_lod0 = [0] + new_lod0 = [] out_seq = [] - for i in range(0, len(lod0) - 1): + offset = 0 + for i in range(0, len(lod0)): num_out = 0 - for dat in in_seq[lod0[i]:lod0[i + 1]]: + for dat in in_seq[offset:(offset + lod0[i])]: if dat not in tokens: out_seq.append(dat) num_out += 1 - new_lod0.append(new_lod0[-1] + num_out) + offset += lod0[i] + new_lod0.append(num_out) return np.array(out_seq).astype("int32"), new_lod0 @@ -34,7 +36,7 @@ class TestSequenceEraseOpInt32(OpTest): def setUp(self): self.op_type = "sequence_erase" in_seq = np.random.randint(0, 10, (30, 1)).astype("int32") - lod = [[0, 9, 13, 24, 30]] + lod = [[9, 4, 11, 6]] tokens = [2, 3, 5] out_seq, new_lod0 = sequence_erase(in_seq, lod[0], tokens) self.attrs = {'tokens': tokens} @@ -49,7 +51,7 @@ class TestSequenceEraseOpInt64(OpTest): def setUp(self): self.op_type = "sequence_erase" in_seq = np.random.randint(0, 10, (30, 1)).astype("int64") - lod = [[0, 9, 13, 24, 30]] + lod = [[9, 4, 11, 6]] tokens = [2, 3, 5] out_seq, new_lod0 = sequence_erase(in_seq, lod[0], tokens) self.attrs = {'tokens': tokens} @@ -64,7 +66,7 @@ class TestSequenceEraseOpEmpty(OpTest): def setUp(self): self.op_type = "sequence_erase" in_seq = np.random.randint(0, 10, (30, 1)).astype("int32") - lod = [[0, 9, 13, 24, 30]] + lod = [[9, 4, 11, 6]] tokens = [] out_seq, new_lod0 = sequence_erase(in_seq, lod[0], tokens) self.attrs = {'tokens': tokens} diff --git a/python/paddle/fluid/tests/unittests/test_sequence_expand.py b/python/paddle/fluid/tests/unittests/test_sequence_expand.py index 4c8ec1426c..0bbd31814e 100644 --- a/python/paddle/fluid/tests/unittests/test_sequence_expand.py +++ b/python/paddle/fluid/tests/unittests/test_sequence_expand.py @@ -21,7 +21,7 @@ class TestSequenceExpand(OpTest): def set_data(self): x_data = np.random.uniform(0.1, 1, [3, 1]).astype('float32') y_data = np.random.uniform(0.1, 1, [8, 1]).astype('float32') - y_lod = [[0, 1, 4, 8]] + y_lod = [[1, 3, 4]] self.inputs = {'X': x_data, 'Y': (y_data, y_lod)} def compute(self): @@ -37,23 +37,27 @@ class TestSequenceExpand(OpTest): out = np.zeros(shape=((0, ) + x_data.shape[1:]), dtype=x_data.dtype) if x_lod is None: - x_idx = [i for i in xrange(x_data.shape[0] + 1)] + # x_idx = [i for i in xrange(x_data.shape[0] + 1)] + x_idx = [1] * x_data.shape[0] else: x_idx = x_lod[0] - out_lod = [[0]] + out_lod = [[]] + + offset = 0 + for i in xrange(len(y_lod[ref_level])): + repeat_num = y_lod[ref_level][i] + x_len = x_idx[i] - for i in xrange(1, len(y_lod[ref_level])): - repeat_num = y_lod[ref_level][i] - y_lod[ref_level][i - 1] - x_len = x_idx[i] - x_idx[i - 1] if repeat_num > 0: - x_sub = x_data[x_idx[i - 1]:x_idx[i], :] + x_sub = x_data[offset:(offset + x_len), :] stacked_x_sub = x_sub for r in range(repeat_num - 1): stacked_x_sub = np.vstack((stacked_x_sub, x_sub)) out = np.vstack((out, stacked_x_sub)) if x_lod is not None: for j in xrange(repeat_num): - out_lod[0].append(out_lod[0][-1] + x_len) + out_lod[0].append(x_len) + offset += x_len if x_lod is None: self.outputs = {'Out': out} @@ -75,9 +79,9 @@ class TestSequenceExpand(OpTest): class TestSequenceExpandCase1(TestSequenceExpand): def set_data(self): x_data = np.random.uniform(0.1, 1, [5, 1]).astype('float32') - x_lod = [[0, 2, 5]] + x_lod = [[2, 3]] y_data = np.random.uniform(0.1, 1, [13, 1]).astype('float32') - y_lod = [[0, 2, 5], [0, 2, 4, 7, 10, 13]] + y_lod = [[2, 3], [2, 2, 3, 3, 3]] self.inputs = {'X': x_data, 'Y': (y_data, y_lod)} self.attrs = {'ref_level': 0} @@ -85,9 +89,9 @@ class TestSequenceExpandCase1(TestSequenceExpand): class TestSequenceExpandCase2(TestSequenceExpand): def set_data(self): x_data = np.random.uniform(0.1, 1, [1, 2, 2]).astype('float32') - x_lod = [[0, 1]] + x_lod = [[1]] y_data = np.random.uniform(0.1, 1, [2, 2, 2]).astype('float32') - y_lod = [[0, 2], [0, 2]] + y_lod = [[2], [1, 1]] self.inputs = {'X': (x_data, x_lod), 'Y': (y_data, y_lod)} self.attrs = {'ref_level': 0} @@ -95,9 +99,9 @@ class TestSequenceExpandCase2(TestSequenceExpand): class TestSequenceExpandCase3(TestSequenceExpand): def set_data(self): x_data = np.random.uniform(0.1, 1, [4, 1]).astype('float32') - x_lod = [[0, 1, 2, 3, 4]] - y_data = np.random.uniform(0.1, 1, [6, 1]).astype('float32') - y_lod = [[0, 2, 4, 4, 6]] + x_lod = [[1, 1, 1, 1]] + y_data = np.random.uniform(0.1, 1, [8, 1]).astype('float32') + y_lod = [[2, 2, 2, 2]] self.inputs = {'X': (x_data, x_lod), 'Y': (y_data, y_lod)} @@ -105,9 +109,9 @@ class TestSequenceExpandCase4(TestSequenceExpand): def set_data(self): data = np.random.uniform(0.1, 1, [5 * 2, 1]) x_data = np.array(data).reshape([5, 2]).astype('float32') - x_lod = [[0, 2, 5]] - y_data = np.random.uniform(0.1, 1, [3, 1]).astype('float32') - y_lod = [[0, 1, 3], [0, 1, 3]] + x_lod = [[2, 3]] + y_data = np.random.uniform(0.1, 1, [5, 1]).astype('float32') + y_lod = [[2], [2, 3]] self.inputs = {'X': (x_data, x_lod), 'Y': (y_data, y_lod)} diff --git a/python/paddle/fluid/tests/unittests/test_sequence_reshape.py b/python/paddle/fluid/tests/unittests/test_sequence_reshape.py index efeab56039..68f2e5eba3 100644 --- a/python/paddle/fluid/tests/unittests/test_sequence_reshape.py +++ b/python/paddle/fluid/tests/unittests/test_sequence_reshape.py @@ -22,7 +22,7 @@ class TestSequenceReshape(OpTest): def setUp(self): self.op_type = 'sequence_reshape' dimension = 12 - x_lod = [[0, 4, 5, 8, 11]] + x_lod = [[4, 1, 3, 3]] x = np.random.uniform(0.1, 1, [11, 24]).astype('float32') self.inputs = {'X': (x, x_lod)} @@ -34,13 +34,13 @@ class TestSequenceReshape(OpTest): def compute_output(self, x, x_lod, dimension): x_width = x.shape[1] - out_lod = [[0]] - for i in xrange(len(x_lod[0]) - 1): - seq_len = x_lod[0][i + 1] - x_lod[0][i] + out_lod = [[]] + for i in xrange(len(x_lod[0])): + seq_len = x_lod[0][i] offset = (seq_len * x_width) / dimension assert int(offset) * dimension == seq_len * x_width - out_lod[0].append(out_lod[0][-1] + int(offset)) - out = np.zeros(shape=(out_lod[0][-1], dimension)).astype('float32') + out_lod[0].append(int(offset)) + out = np.zeros(shape=(sum(out_lod[0]), dimension)).astype('float32') out.ravel()[:] = x.ravel()[:] return out, out_lod @@ -55,7 +55,7 @@ class TestSequenceReshape_reduce(TestSequenceReshape): def setUp(self): self.op_type = 'sequence_reshape' dimension = 24 - x_lod = [[0, 4, 6, 8, 12]] + x_lod = [[4, 2, 2, 4]] x = np.random.uniform(0.1, 1, [12, 12]).astype('float32') self.inputs = {'X': (x, x_lod)} @@ -70,7 +70,7 @@ class TestSequenceReshape_same(TestSequenceReshape): def setUp(self): self.op_type = 'sequence_reshape' dimension = 12 - x_lod = [[0, 4, 6, 8, 12]] + x_lod = [[4, 2, 2, 4]] x = np.random.uniform(0.1, 1, [12, 12]).astype('float32') self.inputs = {'X': (x, x_lod)} diff --git a/python/paddle/fluid/tests/unittests/test_sequence_slice_op.py b/python/paddle/fluid/tests/unittests/test_sequence_slice_op.py index 660b4a171d..313e485d1e 100644 --- a/python/paddle/fluid/tests/unittests/test_sequence_slice_op.py +++ b/python/paddle/fluid/tests/unittests/test_sequence_slice_op.py @@ -29,20 +29,20 @@ class TestSequenceSliceOp(OpTest): self.inputs = {'X': (x, lod), 'Offset': offset, 'Length': length} outs = [] #np.zeros((100, 3, 2)).astype('float32') - out_lod = [[0]] - out_lod_offset = 0 + out_lod = [[]] + lod_offset = 0 for i in range(len(offset)): - sub_x = x[lod[0][i] + offset[i, 0]:lod[0][i] + offset[i, 0] + + sub_x = x[lod_offset + offset[i, 0]:lod_offset + offset[i, 0] + length[i, 0], :] - out_lod_offset = out_lod_offset + len(sub_x) outs.append(sub_x) - out_lod[0].append(out_lod_offset) + out_lod[0].append(len(sub_x)) + lod_offset += lod[0][i] outs = np.concatenate(outs, axis=0) self.outputs = {'Out': (outs, out_lod)} def init_test_case(self): self.x_dim = (100, 3, 2) - self.x_lod = [[0, 20, 40, 60, 80, 100]] + self.x_lod = [[20, 20, 20, 20, 20]] self.offset = [[1], [2], [3], [4], [5]] self.length = [[10], [8], [6], [4], [2]] diff --git a/python/paddle/fluid/tests/unittests/test_sequence_softmax_op.py b/python/paddle/fluid/tests/unittests/test_sequence_softmax_op.py index d6dc99bb31..e91a69a0f8 100644 --- a/python/paddle/fluid/tests/unittests/test_sequence_softmax_op.py +++ b/python/paddle/fluid/tests/unittests/test_sequence_softmax_op.py @@ -26,15 +26,16 @@ class TestSequenceSoftmaxOp(OpTest): self.init_op_type() x = np.random.uniform(0.1, 1, (11, 1)).astype("float32") - lod = [[0, 4, 5, 8, 11]] + lod = [[4, 1, 3, 3]] out = np.zeros((11, 1)).astype("float32") - for i in range(4): - sub_x = x[lod[0][i]:lod[0][i + 1], :] - sub_x = sub_x.reshape(1, lod[0][i + 1] - lod[0][i]) + offset = 0 + for i in range(len(lod[0])): + sub_x = x[offset:offset + lod[0][i], :] + sub_x = sub_x.reshape(1, lod[0][i]) sub_out = stable_softmax(sub_x) - out[lod[0][i]:lod[0][i + 1], :] = sub_out.reshape( - lod[0][i + 1] - lod[0][i], 1) + out[offset:offset + lod[0][i], :] = sub_out.reshape(lod[0][i], 1) + offset += lod[0][i] self.inputs = {"X": (x, lod)} self.outputs = {"Out": out} diff --git a/python/paddle/fluid/tests/unittests/test_shrink_rnn_memory.py b/python/paddle/fluid/tests/unittests/test_shrink_rnn_memory.py index 1d93230e7b..b779f0fb01 100644 --- a/python/paddle/fluid/tests/unittests/test_shrink_rnn_memory.py +++ b/python/paddle/fluid/tests/unittests/test_shrink_rnn_memory.py @@ -54,12 +54,12 @@ class TestShrinkRNNMemoryReferLoD(TestShrinkRNNMemoryBase): def test_refer_lod(self): cpu = core.CPUPlace() x_tensor = core.LoDTensor() - x_tensor.set_lod([[0, 2, 5, 6]]) + x_tensor.set_recursive_sequence_lengths([[2, 3, 1]]) tensor_np = np.random.random(size=(6, 100)).astype('float32') x_tensor.set(tensor_np, cpu) rank_table_tensor = core.LoDTensor() - rank_table_tensor.set_lod([[0, 1, 3, 6]]) + rank_table_tensor.set_recursive_sequence_lengths([[1, 2, 3]]) rank_table_tensor.set(np.random.random(size=(6, 1)).astype('float32'), cpu) @@ -83,7 +83,7 @@ class TestShrinkRNNMemoryNoLoD(TestShrinkRNNMemoryBase): x_tensor.set(tensor_np, cpu) rank_table_tensor = core.LoDTensor() - rank_table_tensor.set_lod([[0, 1, 3, 6]]) + rank_table_tensor.set_recursive_sequence_lengths([[1, 2, 3]]) rank_table_tensor.set(np.random.random(size=(6, 1)).astype('float32'), cpu) diff --git a/python/paddle/fluid/tests/unittests/test_simple_dist_transpiler.py b/python/paddle/fluid/tests/unittests/test_simple_dist_transpiler.py deleted file mode 100644 index f4aa7426bc..0000000000 --- a/python/paddle/fluid/tests/unittests/test_simple_dist_transpiler.py +++ /dev/null @@ -1,80 +0,0 @@ -# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import numpy as np - -import paddle.fluid as fluid -from paddle.fluid.transpiler.distribute_transpiler import delete_ops - -from transpiler_test import TranspilerTest - - -class TestSimpleDistTranspiler(TranspilerTest): - def setUp(self): - self.current_pserver_ep = "127.0.0.1:6175" - - def test_simple_transpiler(self): - np.random.seed(1) - - trainer = self.get_trainer() - pserver, startup = self.get_pserver(self.current_pserver_ep) - self.assertEqual([op.type for op in trainer.global_block().ops], - self.get_expect_trainer_ops()) - - self.assertEqual(len(pserver.blocks), 2) - # block0: listen_and_serv - self.assertEqual([op.type for op in pserver.blocks[0].ops], - ["listen_and_serv"]) - # block1: optimize pass - self.assertEqual([op.type for op in pserver.blocks[1].ops], - ["sum", "scale", "sgd"]) - - # confirm startup program - self.assertEqual([op.type for op in startup.global_block().ops], - ["fill_constant", "uniform_random", "uniform_random"]) - - # the variable #fc_w will NOT be splited - fc_w_var = startup.global_block().var("fc_w@GRAD") - self.assertEqual(fc_w_var.shape, (1000, 1000)) - - fc_w_var = startup.global_block().var("fc_w@GRAD.trainer_0") - self.assertEqual(fc_w_var.shape, (1000, 1000)) - - def get_expect_trainer_ops(self): - trainer = fluid.Program() - - with fluid.program_guard(trainer): - optimize_ops, params_grads = self.net_conf() - - delete_ops(trainer.global_block(), optimize_ops) - ops = [op.type for op in trainer.global_block().ops] + [ - "send", "send_barrier", "recv", "recv", "fetch_barrier" - ] - ops.insert(ops.index("elementwise_add_grad") + 1, "send") - return ops - - def _transpiler_instance(self): - main = self.get_main_program() - t = fluid.DistributeTranspiler() - t.transpile( - self.trainer_id, - program=main, - pservers=self.pserver_eps, - trainers=self.trainers, - slice_var_up=False) - return t - - -if __name__ == "__main__": - unittest.main() diff --git a/python/paddle/fluid/tests/unittests/test_split_and_merge_lod_tensor_op.py b/python/paddle/fluid/tests/unittests/test_split_and_merge_lod_tensor_op.py index 02cc7da849..0916ed7c9f 100644 --- a/python/paddle/fluid/tests/unittests/test_split_and_merge_lod_tensor_op.py +++ b/python/paddle/fluid/tests/unittests/test_split_and_merge_lod_tensor_op.py @@ -56,7 +56,7 @@ class TestCPULoDTensorArrayOps(unittest.TestCase): def test_split_and_merge_lod_tensor_level_0(self): tensor = core.LoDTensor() tensor.set(np.arange(10).reshape(10, 1).astype('int32'), self.place()) - tensor.set_lod([[0, 3, 9, 10]]) + tensor.set_recursive_sequence_lengths([[3, 6, 1]]) mask_np = np.array([0, 1, 0]).astype('bool') mask_np = np.expand_dims(mask_np, axis=1) @@ -68,15 +68,15 @@ class TestCPULoDTensorArrayOps(unittest.TestCase): expect_true_tensor = np.expand_dims(expect_true_tensor, axis=1) expect_true = core.LoDTensor() expect_true.set(expect_true_tensor, self.place()) - expect_true.set_lod([[0, 6]]) + expect_true.set_recursive_sequence_lengths([[6]]) expect_false_tensor = np.array([0, 1, 2, 9]).astype('int32') expect_false_tensor = np.expand_dims(expect_false_tensor, axis=1) - expect_false_lod = [[0, 3, 4]] + expect_false_lod = [[3, 1]] expect_false = core.LoDTensor() expect_false.set(expect_false_tensor, self.place()) - expect_false.set_lod(expect_false_lod) + expect_false.set_recursive_sequence_lengths(expect_false_lod) self.main( tensor=tensor, @@ -126,7 +126,8 @@ class TestCPULoDTensorArrayOps(unittest.TestCase): def check_tensor_same(self, actual, expect): self.assertTrue(np.allclose(np.array(actual), np.array(expect))) - self.assertEqual(actual.lod(), expect.lod()) + self.assertEqual(actual.recursive_sequence_lengths(), + expect.recursive_sequence_lengths()) class TestCPUSplitMergeLoDTensorGrad(unittest.TestCase): @@ -151,7 +152,7 @@ class TestCPUSplitMergeLoDTensorGrad(unittest.TestCase): tensor = core.LoDTensor() tensor.set(np.arange(10).reshape(10, 1).astype('float32'), place) - tensor.set_lod([[0, 3, 9, 10]]) + tensor.set_recursive_sequence_lengths([[3, 6, 1]]) mask_np = np.array([0, 1, 0]).astype('bool') mask_np = np.expand_dims(mask_np, axis=1) diff --git a/python/paddle/fluid/tests/unittests/test_sum_mkldnn_op.py b/python/paddle/fluid/tests/unittests/test_sum_mkldnn_op.py new file mode 100644 index 0000000000..7956897d68 --- /dev/null +++ b/python/paddle/fluid/tests/unittests/test_sum_mkldnn_op.py @@ -0,0 +1,26 @@ +# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import unittest + +from test_sum_op import TestSumOp + + +class TestMKLDNN(TestSumOp): + def init_kernel_type(self): + self.use_mkldnn = True + + +if __name__ == '__main__': + unittest.main() diff --git a/python/paddle/fluid/tests/unittests/test_sum_op.py b/python/paddle/fluid/tests/unittests/test_sum_op.py index 2faf5b1064..1d90414e13 100644 --- a/python/paddle/fluid/tests/unittests/test_sum_op.py +++ b/python/paddle/fluid/tests/unittests/test_sum_op.py @@ -20,12 +20,15 @@ from op_test import OpTest class TestSumOp(OpTest): def setUp(self): self.op_type = "sum" + self.use_mkldnn = False + self.init_kernel_type() x0 = np.random.random((3, 4)).astype('float32') x1 = np.random.random((3, 4)).astype('float32') x2 = np.random.random((3, 4)).astype('float32') self.inputs = {"X": [("x0", x0), ("x1", x1), ("x2", x2)]} y = x0 + x1 + x2 self.outputs = {'Out': y} + self.attrs = {'use_mkldnn': self.use_mkldnn} def test_check_output(self): self.check_output() @@ -33,6 +36,9 @@ class TestSumOp(OpTest): def test_check_grad(self): self.check_grad(['x0'], 'Out') + def init_kernel_type(self): + pass + if __name__ == "__main__": unittest.main() diff --git a/python/paddle/fluid/tests/unittests/test_target_assign_op.py b/python/paddle/fluid/tests/unittests/test_target_assign_op.py index ccb41e56c5..bd20889752 100644 --- a/python/paddle/fluid/tests/unittests/test_target_assign_op.py +++ b/python/paddle/fluid/tests/unittests/test_target_assign_op.py @@ -22,22 +22,23 @@ def gen_match_and_neg_indices(num_prior, gt_lod, neg_lod): if len(gt_lod) != len(neg_lod): raise AssertionError("The input arguments are illegal.") - batch_size = len(gt_lod) - 1 + batch_size = len(gt_lod) match_indices = -1 * np.ones((batch_size, num_prior)).astype('int32') - neg_indices = np.zeros((neg_lod[-1], 1)).astype('int32') + neg_indices = np.zeros((sum(neg_lod), 1)).astype('int32') + offset = 0 for n in range(batch_size): - gt_num = gt_lod[n + 1] - gt_lod[n] + gt_num = gt_lod[n] ids = random.sample([i for i in range(num_prior)], gt_num) match_indices[n, ids] = [i for i in range(gt_num)] ret_ids = set([i for i in range(num_prior)]) - set(ids) - s = neg_lod[n] - e = neg_lod[n + 1] - l = e - s + l = neg_lod[n] neg_ids = random.sample(ret_ids, l) - neg_indices[s:e, :] = np.array(neg_ids).astype('int32').reshape(l, 1) + neg_indices[offset:offset + neg_lod[n], :] = np.array(neg_ids).astype( + 'int32').reshape(l, 1) + offset += neg_lod[n] return match_indices, neg_indices @@ -56,24 +57,28 @@ def target_assign(encoded_box, gt_label, match_indices, neg_indices, gt_lod, # init weight for target label trg_label_wt = np.zeros((batch_size, num_prior, 1)).astype('float32') + gt_offset = 0 + neg_offset = 0 for i in range(batch_size): cur_indices = match_indices[i] col_ids = np.where(cur_indices > -1) col_val = cur_indices[col_ids] - gt_start = gt_lod[i] # target bbox - for v, c in zip(col_val + gt_start, col_ids[0].tolist()): + for v, c in zip(col_val + gt_offset, col_ids[0].tolist()): trg_box[i][c][:] = encoded_box[v][c][:] # weight for target bbox trg_box_wt[i][col_ids] = 1.0 - trg_label[i][col_ids] = gt_label[col_val + gt_start] + trg_label[i][col_ids] = gt_label[col_val + gt_offset] trg_label_wt[i][col_ids] = 1.0 # set target label weight to 1.0 for the negative samples if neg_indices is not None: - neg_ids = neg_indices[neg_lod[i]:neg_lod[i + 1]] + neg_ids = neg_indices[neg_offset:neg_offset + neg_lod[i]] trg_label_wt[i][neg_ids] = 1.0 + # update offset + gt_offset += gt_lod[i] + neg_offset += neg_lod[i] return trg_box, trg_box_wt, trg_label, trg_label_wt @@ -83,11 +88,11 @@ class TestTargetAssginFloatType(OpTest): self.op_type = "target_assign" num_prior = 120 num_class = 21 - gt_lod = [0, 5, 11, 23] - neg_lod = [0, 4, 7, 13] + gt_lod = [5, 6, 12] + neg_lod = [4, 3, 6] mismatch_value = 0 - batch_size = len(gt_lod) - 1 - num_gt = gt_lod[-1] + batch_size = len(gt_lod) + num_gt = sum(gt_lod) encoded_box = np.random.random((num_gt, num_prior, 4)).astype('float32') gt_label = np.random.randint( @@ -121,11 +126,11 @@ class TestTargetAssginIntType(OpTest): self.op_type = "target_assign" num_prior = 120 num_class = 21 - gt_lod = [0, 5, 11, 23] - neg_lod = [0, 4, 7, 13] + gt_lod = [5, 6, 12] + neg_lod = [4, 3, 6] mismatch_value = 0 - batch_size = len(gt_lod) - 1 - num_gt = gt_lod[-1] + batch_size = len(gt_lod) + num_gt = sum(gt_lod) encoded_box = np.random.random((num_gt, num_prior, 4)).astype('float32') gt_label = np.random.randint( diff --git a/python/paddle/fluid/tests/unittests/test_tensor.py b/python/paddle/fluid/tests/unittests/test_tensor.py index 379081c328..f17edd3025 100644 --- a/python/paddle/fluid/tests/unittests/test_tensor.py +++ b/python/paddle/fluid/tests/unittests/test_tensor.py @@ -69,15 +69,14 @@ class TestTensor(unittest.TestCase): array[0, 0, 0] = 3 array[3, 3, 5] = 10 lod_tensor.set(array, place) - lod_tensor.set_lod([[0, 2, 4]]) + lod_tensor.set_recursive_sequence_lengths([[2, 2]]) lod_v = numpy.array(lod_tensor) self.assertTrue(numpy.alltrue(array == lod_v)) - lod = lod_tensor.lod() - self.assertEqual(0, lod[0][0]) + lod = lod_tensor.recursive_sequence_lengths() + self.assertEqual(2, lod[0][0]) self.assertEqual(2, lod[0][1]) - self.assertEqual(4, lod[0][2]) def test_float_lod_tensor(self): place = core.CPUPlace() @@ -97,21 +96,21 @@ class TestTensor(unittest.TestCase): lod_v = numpy.array(lod_tensor) self.assertAlmostEqual(1.0, lod_v[0, 0, 0, 0]) self.assertAlmostEqual(2.0, lod_v[0, 0, 0, 1]) - self.assertEqual(len(lod_tensor.lod()), 0) + self.assertEqual(len(lod_tensor.recursive_sequence_lengths()), 0) - lod_py = [[0, 2, 5], [0, 2, 4, 5]] - lod_tensor.set_lod(lod_py) - lod = lod_tensor.lod() + lod_py = [[2, 1], [1, 2, 2]] + lod_tensor.set_recursive_sequence_lengths(lod_py) + lod = lod_tensor.recursive_sequence_lengths() self.assertListEqual(lod_py, lod) def test_lod_tensor_init(self): scope = core.Scope() place = core.CPUPlace() - lod_py = [[0, 2, 5], [0, 2, 4, 5]] + lod_py = [[2, 1], [1, 2, 2]] lod_tensor = core.LoDTensor() lod_tensor.set_dims([5, 2, 3, 4]) - lod_tensor.set_lod(lod_py) + lod_tensor.set_recursive_sequence_lengths(lod_py) lod_tensor.alloc_float(place) tensor_array = numpy.array(lod_tensor) tensor_array[0, 0, 0, 0] = 1.0 @@ -121,17 +120,17 @@ class TestTensor(unittest.TestCase): lod_v = numpy.array(lod_tensor) self.assertAlmostEqual(1.0, lod_v[0, 0, 0, 0]) self.assertAlmostEqual(2.0, lod_v[0, 0, 0, 1]) - self.assertListEqual(lod_py, lod_tensor.lod()) + self.assertListEqual(lod_py, lod_tensor.recursive_sequence_lengths()) def test_lod_tensor_gpu_init(self): if not core.is_compiled_with_cuda(): return place = core.CUDAPlace(0) - lod_py = [[0, 2, 5], [0, 2, 4, 5]] + lod_py = [[2, 1], [1, 2, 2]] lod_tensor = core.LoDTensor() lod_tensor.set_dims([5, 2, 3, 4]) - lod_tensor.set_lod(lod_py) + lod_tensor.set_recursive_sequence_lengths(lod_py) lod_tensor.alloc_float(place) tensor_array = numpy.array(lod_tensor) tensor_array[0, 0, 0, 0] = 1.0 @@ -141,7 +140,7 @@ class TestTensor(unittest.TestCase): lod_v = numpy.array(lod_tensor) self.assertAlmostEqual(1.0, lod_v[0, 0, 0, 0]) self.assertAlmostEqual(2.0, lod_v[0, 0, 0, 1]) - self.assertListEqual(lod_py, lod_tensor.lod()) + self.assertListEqual(lod_py, lod_tensor.recursive_sequence_lengths()) def test_empty_tensor(self): place = core.CPUPlace() diff --git a/python/paddle/fluid/tests/unittests/test_warpctc_op.py b/python/paddle/fluid/tests/unittests/test_warpctc_op.py index ac638f7836..9f1aaee472 100644 --- a/python/paddle/fluid/tests/unittests/test_warpctc_op.py +++ b/python/paddle/fluid/tests/unittests/test_warpctc_op.py @@ -34,8 +34,8 @@ class CTCForward(object): self.level = 0 self.num_classes = softmax.shape[1] - self.batch_size = len(softmax_lod[self.level]) - 1 - assert self.batch_size == len(labels_lod[self.level]) - 1 + self.batch_size = len(softmax_lod[self.level]) + assert self.batch_size == len(labels_lod[self.level]) self.loss = np.zeros([self.batch_size, 1], dtype="float32") self.gradient = np.zeros(self.softmax.shape, dtype="float32") @@ -156,16 +156,20 @@ class CTCForward(object): return -log_prob def forward(self): + softmax_offset = 0 + labels_offset = 0 for i in range(self.batch_size): - softmax_start_i = self.softmax_lod[self.level][i] - softmax_end_i = self.softmax_lod[self.level][i + 1] - labels_start_i = self.labels_lod[self.level][i] - labels_end_i = self.labels_lod[self.level][i + 1] + softmax_start_i = softmax_offset + softmax_end_i = softmax_offset + self.softmax_lod[self.level][i] + labels_start_i = labels_offset + labels_end_i = labels_offset + self.labels_lod[self.level][i] softmax_a_sequence = self.softmax[softmax_start_i:softmax_end_i, :] labels_a_sequence = self.labels[labels_start_i:labels_end_i, :] self.loss[i] = self.forward_a_sequence(softmax_a_sequence, labels_a_sequence) + softmax_offset += self.softmax_lod[self.level][i] + labels_offset += self.labels_lod[self.level][i] return self.loss @@ -173,8 +177,8 @@ class TestWarpCTCOp(OpTest): def config(self): self.batch_size = 4 self.num_classes = 8 - self.logits_lod = [[0, 4, 5, 8, 11]] - self.labels_lod = [[0, 3, 4, 8, 12]] + self.logits_lod = [[4, 1, 3, 3]] + self.labels_lod = [[3, 1, 4, 4]] self.blank = self.num_classes - 1 self.norm_by_times = False @@ -184,11 +188,13 @@ class TestWarpCTCOp(OpTest): logits = np.random.uniform( 0.1, 1.0, - [self.logits_lod[0][-1], self.num_classes]).astype("float32") + [sum(self.logits_lod[0]), self.num_classes]).astype("float32") softmax = np.apply_along_axis(stable_softmax, 1, logits) # labels should not be blank labels = np.random.randint( - 0, self.num_classes - 1, [self.labels_lod[0][-1], 1], dtype="int32") + 0, + self.num_classes - 1, [sum(self.labels_lod[0]), 1], + dtype="int32") ctc = CTCForward(softmax, self.logits_lod, labels, self.labels_lod, self.blank, self.norm_by_times) @@ -196,9 +202,8 @@ class TestWarpCTCOp(OpTest): max_sequence_length = 0 for i in range(self.batch_size): - max_sequence_length = max( - max_sequence_length, - self.logits_lod[0][i + 1] - self.logits_lod[0][i]) + max_sequence_length = max(max_sequence_length, + self.logits_lod[0][i]) self.gradient = np.zeros( [max_sequence_length, self.batch_size, self.num_classes], dtype="float32") @@ -222,8 +227,8 @@ class TestWarpCTCOpCase1(TestWarpCTCOp): def config(self): self.batch_size = 4 self.num_classes = CUDA_BLOCK_SIZE + 2 - self.logits_lod = [[0, 4, 5, 8, 11]] - self.labels_lod = [[0, 3, 4, 8, 12]] + self.logits_lod = [[4, 1, 3, 3]] + self.labels_lod = [[3, 1, 4, 4]] self.blank = 0 self.norm_by_times = False diff --git a/python/paddle/fluid/tests/unittests/test_weight_normalization.py b/python/paddle/fluid/tests/unittests/test_weight_normalization.py index 2adf917bc5..436f9b9f86 100644 --- a/python/paddle/fluid/tests/unittests/test_weight_normalization.py +++ b/python/paddle/fluid/tests/unittests/test_weight_normalization.py @@ -76,11 +76,11 @@ class TestWeightNormalization(unittest.TestCase): lod_level_i = numpy.random.randint( low=1, high=5, - size=self.batch_size if i == 0 else lod_level_i[-1]) - lod_level_i = [0] + numpy.cumsum(lod_level_i).tolist() + size=self.batch_size + if i == 0 else sum(lod_level_i)).tolist() data_lod.append(lod_level_i) data_value = numpy.random.random( - size=[data_lod[-1][-1] if data_lod else self.batch_size + size=[sum(data_lod[-1]) if data_lod else self.batch_size ] + data_shape).astype('float32') self.data[data_name] = (data_value, data_lod) @@ -90,7 +90,7 @@ class TestWeightNormalization(unittest.TestCase): tensor = fluid.Tensor() tensor.set(self.data[desc[0]][0], place) if self.data[desc[0]][1]: - tensor.set_lod(self.data[desc[0]][1]) + tensor.set_recursive_sequence_lengths(self.data[desc[0]][1]) self.inputs[desc[0]] = tensor def weight_normalize(self): diff --git a/python/paddle/fluid/tests/unittests/testsuite.py b/python/paddle/fluid/tests/unittests/testsuite.py index 1dc94a80c9..a995ee10f2 100644 --- a/python/paddle/fluid/tests/unittests/testsuite.py +++ b/python/paddle/fluid/tests/unittests/testsuite.py @@ -22,7 +22,7 @@ def as_lodtensor(np_array, lod, place): tensor = core.LoDTensor() tensor.set(np_value, place) if lod is not None: - tensor.set_lod(lod) + tensor.set_recursive_sequence_lengths(lod) return tensor @@ -73,7 +73,7 @@ def set_input(scope, op, inputs, place): if isinstance(var, tuple) or isinstance(var, np.ndarray): tensor = scope.find_var(var_name).get_tensor() if isinstance(var, tuple): - tensor.set_lod(var[1]) + tensor.set_recursive_sequence_lengths(var[1]) var = var[0] tensor.set_dims(var.shape) tensor.set(var, place) diff --git a/python/paddle/fluid/tests/unittests/transpiler_test.py b/python/paddle/fluid/tests/unittests/transpiler_test.py deleted file mode 100644 index d84c5d9c41..0000000000 --- a/python/paddle/fluid/tests/unittests/transpiler_test.py +++ /dev/null @@ -1,73 +0,0 @@ -# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import unittest -import numpy as np - -import paddle.fluid as fluid -import paddle.fluid.core as core -import paddle.fluid.layers as layers - - -class TranspilerTest(unittest.TestCase): - @classmethod - def setUpClass(self): - self.trainer_id = 0 - self.trainers = 2 - self.pservers = 2 - self.pserver_eps = "127.0.0.1:6174,127.0.0.1:6175" - - def net_conf(self): - x = fluid.layers.data(name='x', shape=[1000], dtype='float32') - - y_predict = fluid.layers.fc(input=x, - size=1000, - act=None, - param_attr=fluid.ParamAttr(name='fc_w')) - - y = fluid.layers.data(name='y', shape=[1], dtype='float32') - - cost = fluid.layers.square_error_cost(input=y_predict, label=y) - avg_cost = fluid.layers.mean(cost) - sgd_optimizer = fluid.optimizer.SGD(learning_rate=0.1) - - optimize_ops, params_grads = sgd_optimizer.minimize(avg_cost) - return optimize_ops, params_grads - - def get_main_program(self): - main = fluid.Program() - - with fluid.program_guard(main): - self.net_conf() - - return main - - def get_trainer(self): - return self._transpiler_instance().get_trainer_program() - - def get_pserver(self, ep): - t = self._transpiler_instance() - pserver = t.get_pserver_program(ep) - startup = t.get_startup_program(ep, pserver) - return pserver, startup - - def _transpiler_instance(self): - main = self.get_main_program() - t = fluid.DistributeTranspiler() - t.transpile( - self.trainer_id, - program=main, - pservers=self.pserver_eps, - trainers=self.trainers) - return t diff --git a/python/paddle/fluid/trainer.py b/python/paddle/fluid/trainer.py index efc28d8993..b6e0241265 100644 --- a/python/paddle/fluid/trainer.py +++ b/python/paddle/fluid/trainer.py @@ -33,23 +33,59 @@ __all__ = [ class BeginEpochEvent(object): + """ + The begin of a training epoch. + + Args: + epoch_id(int): The current epoch ID. + """ + def __init__(self, epoch_id): self.epoch = epoch_id class EndEpochEvent(object): + """ + The end of a training epoch. + + Args: + epoch_id(int): The current epoch ID. + """ + def __init__(self, epoch_id): self.epoch = epoch_id class BeginStepEvent(object): + """ + The begin of a training epoch. + + Args: + epoch_id(int): The current epoch ID. + step_id(int): The current step ID. + """ + def __init__(self, epoch_id, step_id): self.epoch = epoch_id self.step = step_id self.fetch_metrics = True + """ + If fetch_metrics is true, the metrics will be fetched at the + EndStepEvent. Default is True. + """ class EndStepEvent(object): + """ + The end of a training step. + + Args: + epoch_id(int): The current epoch ID. + step_id(int): The current step ID. + metrics(list): A list of fetched tensor. The order of this list is same + as the :code:`train_func` returns. + """ + def __init__(self, epoch_id, step_id, metrics): self.epoch = epoch_id self.step = step_id @@ -57,32 +93,46 @@ class EndStepEvent(object): class CheckpointConfig(object): + """ + Parameter object for :code:`fluid.io.save_checkpoint` and + :code:`fluid.Trainer`. Used to configuration how to save checkpoint. + + Args: + checkpoint_dir(str): Directory path to save check point. Default is the + current directory. + + max_num_checkpoints(int): The max number of local check points. + epoch_interval(int): Every number of epoch to save check point. + step_interval(int): Every number of step to save check point. + + Examples: + >>> config = fluid.CheckpointConfig("./checkpoints") + >>> trainer = fluid.Trainer(train_func=train_program, + >>> place=place, + >>> optimizer_func=optimizer_func, + >>> checkpoint_config=config) + >>> trainer.train(...) + """ + def __init__(self, checkpoint_dir=None, max_num_checkpoints=3, epoch_interval=1, step_interval=10): - if checkpoint_dir is None: - self.checkpoint_dir = os.getcwd() - else: - self.checkpoint_dir = checkpoint_dir - - self.max_num_checkpoints = max_num_checkpoints - if epoch_interval < 1: - self.epoch_interval = 1 - else: - self.epoch_interval = epoch_interval - - if step_interval < 1: - self.step_interval = 10 - else: - self.step_interval = step_interval + assert epoch_interval >= 1 + assert step_interval >= 1 + self.checkpoint_dir = checkpoint_dir \ + if checkpoint_dir is not None else os.getcwd() + self.max_num_checkpoints = max_num_checkpoints + self.epoch_interval = epoch_interval + self.step_interval = step_interval self.epoch_id = 0 self.step_id = 0 self.load_serial = None - self.is_pserver = False + self.pserver_id = None + self.lookup_table_name = None def check_and_get_place(place): @@ -113,11 +163,62 @@ def check_and_get_place(place): class Trainer(object): """ + A trainer wraps MultiGPU/MultiNode training loops and can be used to train a + simple neural network easily. + + This API takes a :code:`train_func`. A :code:`train_func` is a function that + return loss as it first return value. The reset value can be fetched by + EndStepEvent.metrics + + This API also takes a :code:`optimizer_func` that will return an optimizer + instance. + + For example, to train a MLP for MNIST dataset, the sample program is + + >>> import paddle.fluid as fluid + >>> + >>> def mlp(image, layer_sizes=[200, 100], activation="relu", num_classes=10): + >>> hidden = image + >>> for layer_size in layer_sizes: + >>> hidden = fluid.layers.fc(input=hidden, size=layer_size, act=activation) + >>> return fluid.layers.fc(input=hidden, size=num_classes, act="softmax") + >>> + >>> def train_mnist_mlp(): + >>> img = fluid.layers.data(name='image', shape=[784]) + >>> label = fluid.layers.data(name='label', shape=[1], dtype='int64') + >>> prediction = mlp(img) + >>> return fluid.layers.mean(fluid.layers.cross_entropy(prediction, label)) + >>> + >>> def optimizer(): + >>> return fluid.optimizer.Adam() + >>> + >>> trainer = Trainer(train_func=train_mnist_mlp, + >>> optimizer_func=optimizer, + >>> place=fluid.CUDAPlace(0), + >>> parallel=True) + >>> + >>> def train_callback(event): + >>> if isinstance(event, fluid.EndStepEvent): + >>> print "Epoch ID", event.epoch, "Step ID",\ + >>> event.step, "AvgLoss", event.metrics[0] + >>> elif isinstance(event, fluid.EndEpochEvent): + >>> trainer.save_params("./model_{0}".format(event.epoch)) + >>> + >>> trainer.train(num_epochs=100, event_handler=train_callback) + + For more example, please see :ref:`api_guide_high_level_api`. + Args: - train_func(callable): A function which will return loss. The loss must be a scalar. + train_func(callable): A function which will return loss. The loss must be + a scalar tensor. optimizer_func(callable): A function that returns an Optimizer object. - place: The device place of this trainer. + place(CUDAPlace|CPUPlace): The device place of this trainer. If + :code:`parallel=True,` all CUDA Places will be used if :code:`place` + is a :code:`CUDAPlace`. + parallel(bool): True if use multiple devices. + checkpoint_config(CheckpointConfig): Configuration about how to save + checkpoints. """ def __init__(self, @@ -129,9 +230,6 @@ class Trainer(object): checkpoint_config=None): self.__stop = False self.parallel = parallel - # 1. we need to generate a framework.Program by calling - # program_func. Reference: fluid.program_guard in - # test_word2vec.py # config for checkpoint # only chief worker will save variables @@ -145,6 +243,10 @@ class Trainer(object): self.scope = core.Scope() + # 1. we need to generate a framework.Program by calling + # program_func. Reference: fluid.program_guard in + # test_word2vec.py + self.startup_program = framework.Program() self.train_program = framework.Program() @@ -181,13 +283,20 @@ class Trainer(object): self.checkpoint_cfg.load_serial, self.startup_program) - if not self.checkpoint_cfg.is_pserver: - epoch_id, step_id = io.load_trainer_args( - self.checkpoint_cfg.checkpoint_dir, - self.checkpoint_cfg.load_serial, self.trainer_id, - self._get_checkpoint_load_args()) - self.checkpoint_cfg.epoch_id = int(epoch_id) - self.checkpoint_cfg.step_id = int(step_id) + if not self.checkpoint_cfg.pserver_id: + epoch_id, step_id = io.load_trainer_args( + self.checkpoint_cfg.checkpoint_dir, + self.checkpoint_cfg.load_serial, self.trainer_id, + self._get_checkpoint_load_args()) + self.checkpoint_cfg.epoch_id = int(epoch_id) + self.checkpoint_cfg.step_id = int(step_id) + else: + if self.checkpoint_cfg.lookup_table_name: + io.load_lookup_table_vars( + exe, self.checkpoint_cfg.checkpoint_dir, + self.startup_program, + self.checkpoint_cfg.pserver_id, + self.checkpoint_cfg.lookup_table_name) if param_path and os.path.isdir(param_path): # load params from param_path into scope @@ -206,7 +315,7 @@ class Trainer(object): for ip in worker_ips.split(","): worker_endpoints.append(':'.join([ip, port])) self.num_trainers = len(worker_endpoints) - current_endpoint = os.getenv("POD_IP") + ":" + port + current_endpoint = os.getenv("PADDLE_CURRENT_IP") + ":" + port worker_endpoints.remove(current_endpoint) # TODO(wuyi): use self.nccl_id_var, self.num_trainers and self.trainer_id # in ParallelExecutor to start @@ -257,7 +366,10 @@ class Trainer(object): self.trainer_id, pservers=pserver_endpoints, trainers=trainers) if training_role == "PSERVER": if self.checkpoint_cfg: - self.is_pserver = True + pserver_id = eplist.index(current_endpoint) + self.checkpoint_cfg.pserver_id = pserver_id + if t.has_distributed_lookup_table: + self.checkpoint_cfg.lookup_table_name = t.table_name self.train_program = t.get_pserver_program(current_endpoint) self.startup_program = t.get_startup_program(current_endpoint, @@ -277,17 +389,18 @@ class Trainer(object): def train(self, num_epochs, event_handler, reader=None, feed_order=None): """ - Train the model. + Start the train loop to train the model. Args: - num_epochs: The number of epoch. An epoch will process all data in reader - event_handler: The event handler. A function with type (ev:Event)->void - reader: - feed_order: Feeding order of reader. None will following the defining + num_epochs(int): The number of epoch. An epoch will process all data in reader + event_handler(callable): The event handler. A function with type (ev:Event)->void + reader(callable): A reader creator object. See also + :ref:`api_guide_python_reader` . + feed_order(list): Feeding order of reader. None will following the defining order in program Returns: - + None """ training_role = os.getenv("PADDLE_TRAINING_ROLE", "") if training_role == "PSERVER": @@ -307,16 +420,24 @@ class Trainer(object): Test the model on given test data Args: - reader: The reader that yields test data. - feed_order: Feeding order of reader. None will following the defining - order in program + reader(callable): The reader that yields test data. + feed_order(list): Feeding order of reader. None will following the + defining order in program """ return self._test_by_executor(reader, feed_order, self.train_func_outputs) def save_params(self, param_path): - # reference: save_persistables in io.py + """ + Save all parameters into :code:`param_path`. + + Args: + param_path(str): The path to save parameters. + + Returns: + None + """ with self._prog_and_scope_guard(): exe = executor.Executor(self.place) io.save_persistables(exe, dirname=param_path) @@ -448,7 +569,8 @@ class Trainer(object): def _save_checkpoint(self, epoch_id, step_id): assert self.checkpoint_cfg - if epoch_id % self.checkpoint_cfg.epoch_interval == 0 and step_id % self.checkpoint_cfg.step_interval == 0: + if epoch_id % self.checkpoint_cfg.epoch_interval == 0 \ + and step_id % self.checkpoint_cfg.step_interval == 0: exe = executor.Executor(self.place) io.save_checkpoint( executor=exe, diff --git a/python/paddle/fluid/transpiler/distribute_transpiler.py b/python/paddle/fluid/transpiler/distribute_transpiler.py index 9c604170b8..53d6ca86a0 100644 --- a/python/paddle/fluid/transpiler/distribute_transpiler.py +++ b/python/paddle/fluid/transpiler/distribute_transpiler.py @@ -12,19 +12,11 @@ # See the License for the specific language governing permissions and # limitations under the License. """ -Transpile the program to distributed data-parallelism programs. -The main_program will be transformed to use a remote parameter server -to do parameter optimization. And the optimization graph will be put -into a parameter server program. - -Use different methods to split trainable variables to different -parameter servers. - Steps to transpile trainer: 1. split variable to multiple blocks, aligned by product(dim[1:]) (width). 2. rename splited grad variables to add trainer_id suffix ".trainer_%d". 3. modify trainer program add split_op to each grad variable. -4. append send_op to send splited variables to server and +4. append send_op to send splited variables to server and 5. add recv_op to fetch params(splited blocks or origin param) from server. 6. append concat_op to merge splited blocks to update local weights. @@ -44,7 +36,7 @@ import numpy as np from ps_dispatcher import RoundRobin, HashName, PSDispatcher from .. import core, framework from ..framework import Program, default_main_program, \ - default_startup_program, \ + default_startup_program, Block, \ Variable, Parameter, grad_var_name from details import * @@ -117,129 +109,41 @@ def slice_variable(var_list, slice_count, min_block_size=8192): return blocks -class DistributeTranspiler: - def _has_distributed_lookup_table(self): - # process lookup_table_op - # 1. check all lookup_table_op is distributed - # 2. check all lookup_table_op share the same table. - distributed_lookup_table_ops = [] - # support only one distributed_lookup_table now - self.table_name = None - for op in self.origin_program.global_block().ops: - if op.type == LOOKUP_TABLE_TYPE: - if op.attrs['is_distributed'] is True: - if self.table_name is None: - self.table_name = op.input("W")[0] - if self.table_name != op.input("W")[0]: - raise RuntimeError("all distributed lookup_table_ops" - " should have only one table") - distributed_lookup_table_ops.append(op) - else: - if self.table_name is not None: - assert op.input("W")[0] != self.table_name - - return len(distributed_lookup_table_ops) > 0 - - def _update_dist_lookup_table_vars(self, param_list, grad_list, - params_grads): - # TODO(wuyi): put find a way to put dist lookup table stuff all together. - # update self.table_param_grad and self.trainer_side_table_grad_list - program = self.origin_program - if self.has_distributed_lookup_table: - param_list = [ - param for param in param_list if param.name != self.table_name - ] - grad_list = [ - grad for grad in grad_list - if grad.name != grad_var_name(self.table_name) - ] - self.table_param_grad = [ - param_grad for param_grad in params_grads - if param_grad[0].name == self.table_name - ][0] - table_grad_var = self.table_param_grad[1] - if self.sync_mode: - self.trainer_side_table_grad_list = [ - program.global_block().create_var( - name="%s.trainer_%d.pserver_%d" % - (table_grad_var.name, self.trainer_id, index), - type=table_grad_var.type, - shape=table_grad_var.shape, - dtype=table_grad_var.dtype) - for index in range(len(self.pserver_endpoints)) - ] - else: - self.trainer_side_table_grad_list = [ - program.global_block().create_var( - name="%s.pserver_%d" % (table_grad_var.name, index), - type=table_grad_var.type, - shape=table_grad_var.shape, - dtype=table_grad_var.dtype) - for index in range(len(self.pserver_endpoints)) - ] - return param_list, grad_list - - def _init_splited_vars(self, slice_var_up): - # update these mappings for further transpile: - # 1. param_var_mapping: param var name -> [splited params vars] - # 2. grad_var_mapping: grad var name -> [splited grads vars] - # 3. grad_param_mapping: grad.blockx -> param.blockx - # 4. param_grad_ep_mapping: ep -> {"params": [], "grads": []} - - param_list = [] - grad_list = [] - param_grad_set = set() - for p, g in self.params_grads: - # skip parameter marked not trainable - if type(p) == Parameter and p.trainable == False: - continue - if p.name not in param_grad_set: - param_list.append(p) - param_grad_set.add(p.name) - if g.name not in param_grad_set: - grad_list.append(g) - param_grad_set.add(g.name) - - param_list, grad_list = self._update_dist_lookup_table_vars( - param_list, grad_list, self.params_grads) - - if slice_var_up: - # when we slice var up into blocks, we will slice the var according to - # pserver services' count. A pserver may have two or more listening ports. - grad_blocks = slice_variable(grad_list, len(self.pserver_endpoints)) - param_blocks = slice_variable(param_list, - len(self.pserver_endpoints)) - else: - # when we do NOT slice var up into blocks, we will always slice params - # grads into one block. - grad_blocks = slice_variable(grad_list, 1) - param_blocks = slice_variable(param_list, 1) - assert (len(grad_blocks) == len(param_blocks)) - - # origin_varname -> [splited_var] - self.param_var_mapping = self._create_vars_from_blocklist( - self.origin_program, param_blocks) - self.grad_var_mapping = self._create_vars_from_blocklist( - self.origin_program, - grad_blocks, - add_trainer_suffix=self.trainer_num > 1) - self.grad_param_mapping = dict() - for g, p in zip(grad_blocks, param_blocks): - g_name, g_bid, _ = g.split(":") - p_name, p_bid, _ = p.split(":") - self.grad_param_mapping[self.grad_var_mapping[g_name][int(g_bid)]] = \ - self.param_var_mapping[p_name][int(p_bid)] - - # create mapping of endpoint -> split var to create pserver side program - self.param_grad_ep_mapping = dict() - [ - self.param_grad_ep_mapping.update({ - ep: { - "params": [], - "grads": [] - } - }) for ep in self.pserver_endpoints - ] +class DistributeTranspiler(object): + """ + **DistributeTranspiler** + + Convert the fluid program to distributed data-parallelism programs. + + The main_program will be transformed to use a remote parameter server + to do parameter optimization. And the optimization graph will be put + into a parameter server program. + + Examples: + .. code-block:: python + + # Define your model before these codes. + port = os.getenv("PADDLE_PSERVER_PORT", "6174") + pserver_ips = os.getenv("PADDLE_PSERVER_IPS", "") + eplist = [] + for ip in pserver_ips.split(","): + eplist.append(':'.join([ip, port])) + pserver_endpoints = ",".join(eplist) + trainers = int(os.getenv("PADDLE_TRAINERS")) + current_endpoint = os.getenv("PADDLE_CURRENT_IP", "") + ":" + port + trainer_id = int(os.getenv("PADDLE_TRAINER_ID", "0")) + role = os.getenv("PADDLE_TRAINING_ROLE") + + t = distribute_transpiler.DistributeTranspiler() + t.transpile( + trainer_id, pservers=pserver_endpoints, trainers=trainers) + if role == "PSERVER": + pserver_program = t.get_pserver_program(current_endpoint) + pserver_startup_program = t.get_startup_program(current_endpoint, + pserver_program) + elif role == "TRAINER": + trainer_program = t.get_trainer_program() + """ def transpile(self, trainer_id, @@ -250,20 +154,20 @@ class DistributeTranspiler: split_method=RoundRobin, sync_mode=True): """ - :param trainer_id: one unique id for each trainer in a job. - :type trainer_id: int - :param program: program to transpile, default is default_main_program - :type program: Program - :param pservers: parameter server endpoints like "m1:6174,m2:6174" - :type pservers: string - :param trainers: total number of workers/trainers in the job - :type trainers: int - :param split_method: A function to determin how to split variables - to different servers equally. - :type split_method: function - :type sync_mode: boolean default True - :param sync_mode: if sync_mode is set True, it means that dist transpiler - will transpile the program into sync_mode pserver and trainer program. + Run the transpiler. + + Args: + trainer_id (int): id for current trainer worker, if you have + n workers, the id may range from 0 ~ n-1 + program (Program|None): program to transpile, + default is fluid.default_main_program(). + pservers (str): comma separated ip:port string for the pserver + list. + trainers (int): number of trainers in the distributed job. + slice_var_up (bool): Do Tensor slice for pservers, default is True. + split_method (PSDispatcher): RoundRobin or HashName can be used + try to choose the best method to balance loads for pservers. + sync_mode (bool): Do sync training or not, default is True. """ assert (split_method.__bases__[0] == PSDispatcher) if program is None: @@ -390,20 +294,33 @@ class DistributeTranspiler: self._split_table_grad_and_add_send_vars(program, pserver_endpoints) def get_trainer_program(self): + """ + Get transpiled trainer side program. + + Returns: + Program: trainer side program. + """ # remove optimize ops and add a send op to main_program + # FIXME(typhoonzero): Also ops like clip_gradient, lrn_decay? delete_ops(self.origin_program.global_block(), self.optimize_ops) - # FIXME(typhoonzero): serialize once will fix error occurs when clone. self.origin_program.__str__() return self.origin_program def get_pserver_program(self, endpoint): """ - Get pserver side program using the endpoint. - TODO(panyx0718): Revisit this assumption. what if #blocks > #pservers. - NOTE: assume blocks of the same variable is not distributed - on the same pserver, only change param/grad varnames for - trainers to fetch. + Get parameter server side program. + + Args: + endpoint (str): current parameter server endpoint. + + Returns: + Program: the program for current parameter server to run. """ + # TODO(panyx0718): Revisit this assumption. what if #blocks > #pservers. + # NOTE: assume blocks of the same variable is not distributed + # on the same pserver, only change param/grad varnames for + # trainers to fetch. + # step1 pserver_program = Program() # step2: Create vars to receive vars at parameter servers. @@ -466,12 +383,13 @@ class DistributeTranspiler: if self._is_adam_connected_op(op): global_ops.append(op) - def __append_optimize_op__(op, block, grad_to_block_id, merged_var): + def __append_optimize_op__(op, block, grad_to_block_id, merged_var, + lr_ops): if self._is_optimizer_op(op): self._append_pserver_ops(block, op, endpoint, grad_to_block_id, self.origin_program, merged_var) - else: - self._append_pserver_non_opt_ops(block, op, endpoint) + elif op not in lr_ops: + self._append_pserver_non_opt_ops(block, op) def __op_have_grad_input__(op): for varname in op.input_arg_names: @@ -479,19 +397,50 @@ class DistributeTranspiler: return varname return "" + def __clone_lr_op_sub_block__(op, program, lr_block): + if not op.has_attr('sub_block'): + return + + origin_block_desc = op.attr('sub_block') + origin_block = self.origin_program.block(origin_block_desc.id) + assert isinstance(origin_block, Block) + # we put the new sub block to new block to follow the block + # hierarchy of the original blocks + new_sub_block = program.create_block(lr_block.idx) + + # clone vars + for var in origin_block.vars: + new_sub_block.clone_variable(var) + + # clone ops + for origin_op in origin_block.ops: + cloned_op = self._clone_lr_op(program, new_sub_block, origin_op) + # clone sub_block of op + __clone_lr_op_sub_block__(cloned_op, program, new_sub_block) + + # reset the block of op + op.set_attr('sub_block', new_sub_block) + # append lr decay ops to the child block if exists lr_ops = self._get_lr_ops() + # record optimize blocks and we can run them on pserver parallel + optimize_blocks = [] if len(lr_ops) > 0: lr_decay_block = pserver_program.create_block( pserver_program.num_blocks - 1) + optimize_blocks.append(lr_decay_block) for _, op in enumerate(lr_ops): - self._append_pserver_non_opt_ops(lr_decay_block, op, endpoint) + cloned_op = self._append_pserver_non_opt_ops(lr_decay_block, op) + # append sub blocks to pserver_program in lr_decay_op + __clone_lr_op_sub_block__(cloned_op, pserver_program, + lr_decay_block) # append op to the current block grad_to_block_id = [] pre_block_idx = pserver_program.num_blocks - 1 for idx, opt_op in enumerate(opt_op_on_pserver): per_opt_block = pserver_program.create_block(pre_block_idx) + optimize_blocks.append(per_opt_block) # append grad merging ops before clip and weight decay for _, op in enumerate(self.optimize_ops): # find the origin @GRAD var before clipping @@ -504,15 +453,18 @@ class DistributeTranspiler: # optimizer is connected to itself if ufind.is_connected(op, opt_op) and op not in global_ops: __append_optimize_op__(op, per_opt_block, grad_to_block_id, - merged_var) + merged_var, lr_ops) + # dedup grad to ids list + grad_to_block_id = list(set(grad_to_block_id)) # append global ops if global_ops: opt_state_block = pserver_program.create_block( pserver_program.num_blocks - 1) + optimize_blocks.append(opt_state_block) for glb_op in global_ops: __append_optimize_op__(glb_op, opt_state_block, - grad_to_block_id, None) + grad_to_block_id, None, lr_ops) # process distributed lookup_table prefetch_var_name_to_block_id = [] @@ -522,6 +474,8 @@ class DistributeTranspiler: pserver_index, pserver_program, pre_block_idx, grad_to_block_id) prefetch_var_name_to_block_id = self._create_prefetch_block( pserver_index, pserver_program, table_opt_block) + checkpoint_block_id = self._create_checkpoint_save_block( + pserver_program, table_opt_block.idx) # NOTE: if has_distributed_lookup_table is False, then prefetch_block will # not be executed, so it's safe to use optimize_block to hold the place @@ -531,15 +485,16 @@ class DistributeTranspiler: assert len(prefetch_var_name_to_block_id) == 0 attrs = { - "OptimizeBlock": pserver_program.block(1), + "optimize_blocks": optimize_blocks, "endpoint": endpoint, "Fanin": self.trainer_num, "sync_mode": self.sync_mode, - "grad_to_block_id": grad_to_block_id + "grad_to_block_id": grad_to_block_id, } if len(prefetch_var_name_to_block_id) > 0: attrs['prefetch_var_name_to_block_id'] \ = prefetch_var_name_to_block_id + attrs['checkpint_block_id'] = checkpoint_block_id # step5 append the listen_and_serv op pserver_program.global_block().append_op( @@ -556,6 +511,14 @@ class DistributeTranspiler: Get startup program for current parameter server. Modify operator input variables if there are variables that were split to several blocks. + + Args: + endpoint (str): current pserver endpoint. + pserver_program (Program): call get_pserver_program first and + pass the result here. + + Returns: + Program: parameter server side startup program. """ s_prog = Program() orig_s_prog = default_startup_program() @@ -577,7 +540,6 @@ class DistributeTranspiler: # 2. rename op outputs for op in orig_s_prog.global_block().ops: - new_inputs = dict() new_outputs = dict() # do not append startup op if var is not on this pserver op_on_pserver = False @@ -590,10 +552,10 @@ class DistributeTranspiler: op_on_pserver = True new_outputs[key] = pserver_vars[op.output(key)[0]] - # most startup program ops have no inputs - new_inputs = self._get_input_map_from_op(pserver_vars, op) - if op_on_pserver: + # most startup program ops have no inputs + new_inputs = self._get_input_map_from_op(pserver_vars, op) + if op.type in [ "gaussian_random", "fill_constant", "uniform_random" ]: @@ -607,6 +569,129 @@ class DistributeTranspiler: # ====================== private transpiler functions ===================== + def _has_distributed_lookup_table(self): + # process lookup_table_op + # 1. check all lookup_table_op is distributed + # 2. check all lookup_table_op share the same table. + distributed_lookup_table_ops = [] + # support only one distributed_lookup_table now + self.table_name = None + for op in self.origin_program.global_block().ops: + if op.type == LOOKUP_TABLE_TYPE: + if op.attrs['is_distributed'] is True: + if self.table_name is None: + self.table_name = op.input("W")[0] + if self.table_name != op.input("W")[0]: + raise RuntimeError("all distributed lookup_table_ops" + " should have only one table") + distributed_lookup_table_ops.append(op) + else: + if self.table_name is not None: + assert op.input("W")[0] != self.table_name + + return len(distributed_lookup_table_ops) > 0 + + def _update_dist_lookup_table_vars(self, param_list, grad_list, + params_grads): + # TODO(wuyi): put find a way to put dist lookup table stuff all together. + # update self.table_param_grad and self.trainer_side_table_grad_list + program = self.origin_program + if self.has_distributed_lookup_table: + param_list = [ + param for param in param_list if param.name != self.table_name + ] + grad_list = [ + grad for grad in grad_list + if grad.name != grad_var_name(self.table_name) + ] + self.table_param_grad = [ + param_grad for param_grad in params_grads + if param_grad[0].name == self.table_name + ][0] + table_grad_var = self.table_param_grad[1] + if self.sync_mode: + self.trainer_side_table_grad_list = [ + program.global_block().create_var( + name="%s.trainer_%d.pserver_%d" % + (table_grad_var.name, self.trainer_id, index), + type=table_grad_var.type, + shape=table_grad_var.shape, + dtype=table_grad_var.dtype) + for index in range(len(self.pserver_endpoints)) + ] + else: + self.trainer_side_table_grad_list = [ + program.global_block().create_var( + name="%s.pserver_%d" % (table_grad_var.name, index), + type=table_grad_var.type, + shape=table_grad_var.shape, + dtype=table_grad_var.dtype) + for index in range(len(self.pserver_endpoints)) + ] + return param_list, grad_list + + def _init_splited_vars(self, slice_var_up): + # update these mappings for further transpile: + # 1. param_var_mapping: param var name -> [splited params vars] + # 2. grad_var_mapping: grad var name -> [splited grads vars] + # 3. grad_param_mapping: grad.blockx -> param.blockx + # 4. param_grad_ep_mapping: ep -> {"params": [], "grads": []} + + param_list = [] + grad_list = [] + param_grad_set = set() + for p, g in self.params_grads: + # skip parameter marked not trainable + if type(p) == Parameter and p.trainable == False: + continue + if p.name not in param_grad_set: + param_list.append(p) + param_grad_set.add(p.name) + if g.name not in param_grad_set: + grad_list.append(g) + param_grad_set.add(g.name) + + param_list, grad_list = self._update_dist_lookup_table_vars( + param_list, grad_list, self.params_grads) + + if slice_var_up: + # when we slice var up into blocks, we will slice the var according to + # pserver services' count. A pserver may have two or more listening ports. + grad_blocks = slice_variable(grad_list, len(self.pserver_endpoints)) + param_blocks = slice_variable(param_list, + len(self.pserver_endpoints)) + else: + # when we do NOT slice var up into blocks, we will always slice params + # grads into one block. + grad_blocks = slice_variable(grad_list, 1) + param_blocks = slice_variable(param_list, 1) + assert (len(grad_blocks) == len(param_blocks)) + + # origin_varname -> [splited_var] + self.param_var_mapping = self._create_vars_from_blocklist( + self.origin_program, param_blocks) + self.grad_var_mapping = self._create_vars_from_blocklist( + self.origin_program, + grad_blocks, + add_trainer_suffix=self.trainer_num > 1) + self.grad_param_mapping = dict() + for g, p in zip(grad_blocks, param_blocks): + g_name, g_bid, _ = g.split(":") + p_name, p_bid, _ = p.split(":") + self.grad_param_mapping[self.grad_var_mapping[g_name][int(g_bid)]] = \ + self.param_var_mapping[p_name][int(p_bid)] + + # create mapping of endpoint -> split var to create pserver side program + self.param_grad_ep_mapping = dict() + [ + self.param_grad_ep_mapping.update({ + ep: { + "params": [], + "grads": [] + } + }) for ep in self.pserver_endpoints + ] + # transpiler function for dis lookup_table def _replace_lookup_table_op_with_prefetch(self, program, pserver_endpoints): @@ -798,7 +883,8 @@ class DistributeTranspiler: table_opt_block.append_op( type="sum", inputs={"X": pserver_side_table_grad_list}, - outputs={"Out": [grad_var]}) + outputs={"Out": [grad_var]}, + attrs={"use_mkldnn": False}) else: # in async_mode, for table gradient, it also need to be splited to each parameter server origin_grad_name = grad_var.name @@ -829,6 +915,27 @@ class DistributeTranspiler: return table_opt_block + def _create_checkpoint_save_block(self, pserver_program, pre_block_idx): + """ + create a new block to handle save checkpoint. + """ + import os + + pserver_program.global_block().create_var( + name="kLookupTablePath", + persistable=True, + type=core.VarDesc.VarType.RAW) + + checkpoint_save_block = pserver_program.create_block(pre_block_idx) + # this 'file_path' do not be used in save lookup table variable + checkpoint_save_block.append_op( + type='save', + inputs={'X': [self.table_name]}, + outputs={}, + attrs={'file_path': "none"}) + + return checkpoint_save_block.idx + def _create_vars_from_blocklist(self, program, block_list, @@ -855,8 +962,6 @@ class DistributeTranspiler: if not block_map.has_key(varname): block_map[varname] = [] block_map[varname].append((long(offset), long(size))) - # Do not remove this important debug message: - print("block map: %s" % block_map) for varname, splited in block_map.iteritems(): orig_var = program.global_block().var(varname) @@ -1030,7 +1135,8 @@ class DistributeTranspiler: optimize_block.append_op( type="sum", inputs={"X": vars2merge}, - outputs={"Out": merged_var}) + outputs={"Out": merged_var}, + attrs={"use_mkldnn": False}) # TODO(panyx0718): What if it's SELECTED_ROWS. if not merged_var.type == core.VarDesc.VarType.SELECTED_ROWS: optimize_block.append_op( @@ -1116,7 +1222,29 @@ class DistributeTranspiler: break return grad_block - def _append_pserver_non_opt_ops(self, optimize_block, opt_op, endpoint): + def _clone_lr_op(self, program, block, op): + inputs = self._get_input_map_from_op( + self.origin_program.global_block().vars, op) + for key, varlist in inputs.iteritems(): + if not isinstance(varlist, list): + varlist = [varlist] + for var in varlist: + if var not in program.global_block().vars: + block.clone_variable(var) + + outputs = self._get_output_map_from_op( + self.origin_program.global_block().vars, op) + for key, varlist in outputs.iteritems(): + if not isinstance(varlist, list): + varlist = [varlist] + for var in varlist: + if var not in program.global_block().vars: + block.clone_variable(var) + + return block.append_op( + type=op.type, inputs=inputs, outputs=outputs, attrs=op.attrs) + + def _append_pserver_non_opt_ops(self, optimize_block, opt_op): program = optimize_block.program # Append the ops for parameters that do not need to be optimized/updated inputs = self._get_input_map_from_op( @@ -1151,7 +1279,7 @@ class DistributeTranspiler: elif not program.global_block().vars.has_key(var.name): program.global_block().clone_variable(var) - optimize_block.append_op( + return optimize_block.append_op( type=opt_op.type, inputs=inputs, outputs=outputs, @@ -1195,16 +1323,6 @@ class DistributeTranspiler: ufind.union(op1, op2) return ufind - def _is_opt_role_op(self, op): - # NOTE: depend on oprole to find out whether this op is for - # optimize - op_maker = core.op_proto_and_checker_maker - optimize_role = core.op_proto_and_checker_maker.OpRole.Optimize - if op_maker.kOpRoleAttrName() in op.attrs and \ - int(op.attrs[op_maker.kOpRoleAttrName()]) == int(optimize_role): - return True - return False - def _is_optimizer_op(self, op): if "Param" in op.input_names and \ "LearningRate" in op.input_names: @@ -1283,6 +1401,16 @@ class DistributeTranspiler: break return lr_ops + def _is_opt_role_op(self, op): + # NOTE: depend on oprole to find out whether this op is for + # optimize + op_maker = core.op_proto_and_checker_maker + optimize_role = core.op_proto_and_checker_maker.OpRole.Optimize + if op_maker.kOpRoleAttrName() in op.attrs and \ + int(op.attrs[op_maker.kOpRoleAttrName()]) == int(optimize_role): + return True + return False + def _get_optimize_pass(self): """ Get optimizer operators, paramters and gradients from origin_program diff --git a/python/paddle/fluid/transpiler/inference_transpiler.py b/python/paddle/fluid/transpiler/inference_transpiler.py index 202aa76084..b8afeae5eb 100644 --- a/python/paddle/fluid/transpiler/inference_transpiler.py +++ b/python/paddle/fluid/transpiler/inference_transpiler.py @@ -12,23 +12,41 @@ # See the License for the specific language governing permissions and # limitations under the License. +import os import numpy as np from .. import core from ..framework import Program from ..executor import global_scope -class InferenceTranspiler: +class InferenceTranspiler(object): + ''' + Convert the fluid program to optimized inference program. + + There are several optimizations: + + - fuse convolution and batch normalization + - fuse batch normalization and relu (MKLDNN only) + + Examples: + + .. code-block:: python + + # As InferenceTranspiler will modify the original program, + # please clone before use it. + inference_transpiler_program = program.clone() + t = fluid.InferenceTranspiler() + t.transpile(inference_transpiler_program, place) + ''' + def transpile(self, program, place, scope=None): ''' - Transpile the program. Support only fuse batch normalization now. + Run the transpiler. - :param program: program to transpile - :type program: Program - :param place: inference place - :type place: Place - :param scope: inference scope - :type scope: Scope or None + Args: + program (Program): program to transpile + place (Place): inference place + scope (Scope|None): inference Scope ''' if not isinstance(program, Program): raise TypeError("program should be as Program type") @@ -40,58 +58,110 @@ class InferenceTranspiler: if not isinstance(scope, core.Scope): raise TypeError("scope should be as Scope type or None") self.fuse_batch_norm(program, place, scope) + self.fuse_relu_mkldnn(program) + + def fuse_relu_mkldnn(self, program): + ''' + Transpile the program by fused relu activation for MKLDNN program. + + Relu activation following batch norm OP can be fused by adding + :math:`fuse_with_relu` attribute to batch norm OP. + + The result of fuse is: + + - before: + + - batch_norm->relu->any_other_op + + - after: + + - batch_norm->any_other_op + + :param program: program to transpile + :type program: Program + ''' + use_mkldnn = bool(os.getenv("FLAGS_use_mkldnn", False)) + if not use_mkldnn: + return + + self.block = program.block(0) + + i = 0 + while i < len(self.block.ops) - 1: + current_op = self.block.ops[i] + if current_op.type in ['batch_norm']: + next_op = self.block.ops[i + 1] + if next_op.type == 'relu': + # modify bnorm OP to include relu + current_op.set_attr("fuse_with_relu", True) + # remove relu OP + self.block.remove_op(i + 1) + i = i + 1 + + self._remove_unused_var() + # TODO(luotao): use clone() method to flush the program.desc in force, + # since some large program.desc will not be flushed immediately. + # And a better solution will be considered later. + program = program.clone() def fuse_batch_norm(self, program, place, scope): ''' Transpile the program by fused batch normalization. - - The batch normalization followed the convolution or fully connected layer - can be integrated with them. Doing so will give us a forward acceleration, + + The batch normalization followed the convolution or fully connected layer + can be integrated with them. Doing so will give us a forward acceleration, especially in environments like mobile or embedded. - - For input X: - - Conv process: X = input * W + bias - - Batch norm process: X' = (X - mean) / std - - Scale Process: Y = a * X' + b + + For input :math:`X`: + + - Conv process: :math:`X = input * W + bias` + - Batch norm process: :math:`X' = (X - mean) / std` + - Scale Process: :math:`Y = a * X' + b` After fuse into one operation: - Y = (input * W + bias - mean) / std * a + b - = input * a * W / std + ((bias - mean) / std * a + b) + .. math:: + + Y &= (input * W + bias - mean) / std * a + b \\\\ + &= input * a * W / std + ((bias - mean) / std * a + b) + + The operator transformation is: - The operator transformation is: - before: + - conv->batch_norm->any_other_op (bias == 0) - conv->elementwise_add->batch_norm->any_other_op (bias != 0) - - after: + + - after: + - conv->elementwise_add->any_other_op - + The transpile stages are: + 1. insert elementwise_add op when bias == 0. 2. fuse the batch_norm's parameters to conv and elementwise_add operators. 3. remove batch_norm ops which are not used in any other ops. 4. adjust the input of any_other_op to be the output of elementwise_add operator. 5. remove unused variables. - :param program: program to transpile - :type program: Program - :param place: inference place - :type place: Place - :param scope: inference scope - :type scope: Scope + Args: + program (Program): program to transpile + place (Place): inference place + scope (Scope): inference Scope + ''' self.scope = scope self.place = place self.block = program.block(0) - self.input_map = {} # store the input names should be adjusted + self.input_map = {} # store the input names should be adjusted i = 0 - while i < len(self.block.ops): + while i < len(self.block.ops) - 2: current_op = self.block.ops[i] # TODO(luotao1): consider only conv2d now. fc would be delt later. if current_op.type in ['conv2d']: - # TODO(luotao1): consider single chain network now. - # For branch network, we counldn't use block.ops[i + 1] as + # TODO(luotao1): consider single chain network now. + # For branch network, we counldn't use block.ops[i + 1] as # the judgment condition. next_op = self.block.ops[i + 1] # conv2d without bias @@ -116,17 +186,17 @@ class InferenceTranspiler: self._adjust_input() self._remove_unused_var() - # TODO(luotao): use clone() method to flush the program.desc in force, - # since some large program.desc will not be flushed immediately. + # TODO(luotao): use clone() method to flush the program.desc in force, + # since some large program.desc will not be flushed immediately. # And a better solution will be considered later. program = program.clone() # ====================== private transpiler functions ===================== def _insert_bias_op(self, index, current_op, bn_op): ''' - Construct elementwise_add operator for adding bias + Construct elementwise_add operator for adding bias and insert it into program. - + :param index: insert location of bias_op :type index: Int :param current_op: current operator (conv or fc) @@ -154,14 +224,14 @@ class InferenceTranspiler: def _fuse_param(self, current_op, bn_op, bias_op, with_bias): ''' fuse the batch_norm_op' parameters to current_op (conv or fc) - + :param current_op: current operator (conv or fc) :type current_op: Operator :param bn_op: batch norm operator :type bn_op: Operator :param bias_op: elementwise_add operator for adding bias :type bias_op: Operator - :param with_bias: If current operator has bias, with_bias = 1; otherwise 0. + :param with_bias: If current operator has bias, with_bias = 1; otherwise 0. :type with_bias: Int ''' diff --git a/python/paddle/fluid/transpiler/memory_optimization_transpiler.py b/python/paddle/fluid/transpiler/memory_optimization_transpiler.py index 9ff0ae6fca..999ef43ca0 100644 --- a/python/paddle/fluid/transpiler/memory_optimization_transpiler.py +++ b/python/paddle/fluid/transpiler/memory_optimization_transpiler.py @@ -157,9 +157,11 @@ class ControlFlowGraph(object): if op.type() == "fill_constant" and op.attr("force_cpu") == True: self._skip_opt.update(op.output_arg_names()) - def release_memory(self): + def release_memory(self, skip_opt_set=None): self._dataflow_analyze() self._update_skip_opt_set() + if skip_opt_set: + self._skip_opt.update(skip_opt_set) fwd_id = 0 bwd_id = 0 for i in range(self.op_size): @@ -183,7 +185,7 @@ class ControlFlowGraph(object): else: bwd_id += 1 - def memory_optimize(self, level=0): + def memory_optimize(self, skip_opt_set=None, level=0): def compare_shape(x_shape, cache_shape, opt_level): if opt_level == 0: return x_shape == cache_shape @@ -200,6 +202,9 @@ class ControlFlowGraph(object): self._dataflow_analyze() self._update_skip_opt_set() + # update skip set to meet users' demand + if skip_opt_set: + self._skip_opt.update(skip_opt_set) self.pool = [] for i in range(self.op_size): op = self._ops[i] @@ -358,7 +363,7 @@ def _get_cfgs(input_program): return cfgs -def memory_optimize(input_program, print_log=False, level=0): +def memory_optimize(input_program, skip_opt_set=None, print_log=False, level=0): """Optimize memory by reusing var memory. Note: it doesn't not support subblock nested in subblock. @@ -374,10 +379,20 @@ def memory_optimize(input_program, print_log=False, level=0): PRINT_LOG = print_log cfgs = _get_cfgs(input_program) for cfg in cfgs: - cfg.memory_optimize(level) + cfg.memory_optimize(skip_opt_set=skip_opt_set, level=level) -def release_memory(input_program): +def release_memory(input_program, skip_opt_set=None): + """ + Modify the input program and insert :code:`delete_op` to early drop not used + variables. The modification will be performed inplace. + + Notes: This is an experimental API and could be removed in next few + releases. Users should not use this API. + + Args: + input_program(Program): The program will be inserted :code:`delete_op`. + """ cfgs = _get_cfgs(input_program) for cfg in cfgs: - cfg.release_memory() + cfg.release_memory(skip_opt_set=skip_opt_set) diff --git a/python/paddle/fluid/transpiler/ps_dispatcher.py b/python/paddle/fluid/transpiler/ps_dispatcher.py index d6a6867752..dcffadd531 100644 --- a/python/paddle/fluid/transpiler/ps_dispatcher.py +++ b/python/paddle/fluid/transpiler/ps_dispatcher.py @@ -33,15 +33,21 @@ class PSDispatcher(object): def dispatch(self, varlist): """ - :param varlist: a list of Variables - :return: a map of pserver endpoint -> varname + Args: + varlist(list): a list of Variables + Returns: + a map of pserver endpoint -> varname """ AssertionError("Interface has not been implemented.") class HashName(PSDispatcher): """ - Hash variable names to several endpoints + Hash variable names to several endpoints using python + "hash()" function. + + Args: + pserver_endpoints (list): list of endpoint(ip:port). """ def __init__(self, pserver_endpoints): @@ -61,7 +67,11 @@ class HashName(PSDispatcher): class RoundRobin(PSDispatcher): """ - Distribute variables to serveral endpoints. + Distribute variables to serveral endpoints using + RondRobin method. + + Args: + pserver_endpoints (list): list of endpoint(ip:port). """ def __init__(self, pserver_endpoints): diff --git a/python/paddle/fluid/unique_name.py b/python/paddle/fluid/unique_name.py index 33c53113ae..776619cd36 100644 --- a/python/paddle/fluid/unique_name.py +++ b/python/paddle/fluid/unique_name.py @@ -16,7 +16,7 @@ import collections import contextlib import sys -__all__ = ['generate', 'switch', 'guard', 'UniqueNameGenerator'] +__all__ = ['generate', 'switch', 'guard'] class UniqueNameGenerator(object): diff --git a/python/paddle/libs/__init__.py b/python/paddle/libs/__init__.py new file mode 100644 index 0000000000..34d4f4d07e --- /dev/null +++ b/python/paddle/libs/__init__.py @@ -0,0 +1,15 @@ +# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# used for setup.py.in to store the thirdparty shared libraries diff --git a/python/paddle/reader/decorator.py b/python/paddle/reader/decorator.py index 44a6e34463..1f83cabb84 100644 --- a/python/paddle/reader/decorator.py +++ b/python/paddle/reader/decorator.py @@ -336,7 +336,7 @@ def _buf2lines(buf, line_break="\n"): class PipeReader: """ - PipeReader read data by stream from a command, take it's + PipeReader read data by stream from a command, take it's stdout into a pipe buffer and redirect it to the parser to parse, then yield data as your desired format. @@ -352,7 +352,7 @@ class PipeReader: An example: .. code-block:: python - + def example_reader(): for f in myfiles: pr = PipeReader("cat %s"%f) diff --git a/python/paddle/trainer/config_parser.py b/python/paddle/trainer/config_parser.py index 460eb3b349..5b90facd49 100644 --- a/python/paddle/trainer/config_parser.py +++ b/python/paddle/trainer/config_parser.py @@ -67,7 +67,7 @@ extension_module_name=[MODULE_NAME], then config_parser will call MODULE_NAME.get_config_funcs(g_config) MODULE_NAME.get_config_funcs() should return a dictionary of name to functions, those functions will be available in the config file. -See trainer/tests/config_parser_test.py for example +See legacy/trainer/tests/config_parser_test.py for example To use this from paddle_trainer, paddle_trainer should be called with --config_args=extension_module_name=[MODULE_NAME] diff --git a/python/paddle/trainer_config_helpers/layers.py b/python/paddle/trainer_config_helpers/layers.py index e6a03759ef..d9787ef42a 100644 --- a/python/paddle/trainer_config_helpers/layers.py +++ b/python/paddle/trainer_config_helpers/layers.py @@ -4182,9 +4182,9 @@ def recurrent_group(step, input, reverse=False, name=None, targetInlink=None): You can see following configs for further usages: - - time steps: lstmemory_group, paddle/gserver/tests/sequence_layer_group.conf, \ + - time steps: lstmemory_group, paddle/legacy/gserver/tests/sequence_layer_group.conf, \ demo/seqToseq/seqToseq_net.py - - sequence steps: paddle/gserver/tests/sequence_nest_layer_group.conf + - sequence steps: paddle/legacy/gserver/tests/sequence_nest_layer_group.conf :param step: A step function which takes the input of recurrent_group as its own input and returns values as recurrent_group's output every time step. diff --git a/python/paddle/v2/dataset/cifar.py b/python/paddle/v2/dataset/cifar.py index 0a2a1ced11..662655c836 100644 --- a/python/paddle/v2/dataset/cifar.py +++ b/python/paddle/v2/dataset/cifar.py @@ -43,7 +43,7 @@ CIFAR100_URL = URL_PREFIX + 'cifar-100-python.tar.gz' CIFAR100_MD5 = 'eb9058c3a382ffc7106e4002c42a8d85' -def reader_creator(filename, sub_name): +def reader_creator(filename, sub_name, cycle=False): def read_batch(batch): data = batch['data'] labels = batch.get('labels', batch.get('fine_labels', None)) @@ -56,10 +56,13 @@ def reader_creator(filename, sub_name): names = (each_item.name for each_item in f if sub_name in each_item.name) - for name in names: - batch = cPickle.load(f.extractfile(name)) - for item in read_batch(batch): - yield item + while True: + for name in names: + batch = cPickle.load(f.extractfile(name)) + for item in read_batch(batch): + yield item + if not cycle: + break return reader @@ -94,34 +97,40 @@ def test100(): 'test') -def train10(): +def train10(cycle=False): """ CIFAR-10 training set creator. It returns a reader creator, each sample in the reader is image pixels in [0, 1] and label in [0, 9]. + :param cycle: whether to cycle through the dataset + :type cycle: bool :return: Training reader creator :rtype: callable """ return reader_creator( paddle.v2.dataset.common.download(CIFAR10_URL, 'cifar', CIFAR10_MD5), - 'data_batch') + 'data_batch', + cycle=cycle) -def test10(): +def test10(cycle=False): """ CIFAR-10 test set creator. It returns a reader creator, each sample in the reader is image pixels in [0, 1] and label in [0, 9]. + :param cycle: whether to cycle through the dataset + :type cycle: bool :return: Test reader creator. :rtype: callable """ return reader_creator( paddle.v2.dataset.common.download(CIFAR10_URL, 'cifar', CIFAR10_MD5), - 'test_batch') + 'test_batch', + cycle=cycle) def fetch(): diff --git a/python/paddle/v2/dataset/flowers.py b/python/paddle/v2/dataset/flowers.py index 357a4e9b00..db12076d54 100644 --- a/python/paddle/v2/dataset/flowers.py +++ b/python/paddle/v2/dataset/flowers.py @@ -76,7 +76,8 @@ def reader_creator(data_file, dataset_name, mapper, buffered_size=1024, - use_xmap=True): + use_xmap=True, + cycle=False): ''' 1. read images from tar file and merge images into batch files in 102flowers.tgz_batch/ @@ -96,6 +97,8 @@ def reader_creator(data_file, :type mapper: callable :param buffered_size: the size of buffer used to process images :type buffered_size: int + :param cycle: whether to cycle through the dataset + :type cycle: bool :return: data reader :rtype: callable ''' @@ -108,15 +111,18 @@ def reader_creator(data_file, file_list = batch_images_from_tar(data_file, dataset_name, img2label) def reader(): - for file in open(file_list): - file = file.strip() - batch = None - with open(file, 'r') as f: - batch = cPickle.load(f) - data = batch['data'] - labels = batch['label'] - for sample, label in itertools.izip(data, batch['label']): - yield sample, int(label) - 1 + while True: + for file in open(file_list): + file = file.strip() + batch = None + with open(file, 'r') as f: + batch = cPickle.load(f) + data = batch['data'] + labels = batch['label'] + for sample, label in itertools.izip(data, batch['label']): + yield sample, int(label) - 1 + if not cycle: + break if use_xmap: cpu_num = int(os.environ.get('CPU_NUM', cpu_count())) @@ -125,7 +131,7 @@ def reader_creator(data_file, return map_readers(mapper, reader) -def train(mapper=train_mapper, buffered_size=1024, use_xmap=True): +def train(mapper=train_mapper, buffered_size=1024, use_xmap=True, cycle=False): ''' Create flowers training set reader. It returns a reader, each sample in the reader is @@ -138,17 +144,23 @@ def train(mapper=train_mapper, buffered_size=1024, use_xmap=True): :type mapper: callable :param buffered_size: the size of buffer used to process images :type buffered_size: int + :param cycle: whether to cycle through the dataset + :type cycle: bool :return: train data reader :rtype: callable ''' return reader_creator( download(DATA_URL, 'flowers', DATA_MD5), download(LABEL_URL, 'flowers', LABEL_MD5), - download(SETID_URL, 'flowers', SETID_MD5), TRAIN_FLAG, mapper, - buffered_size, use_xmap) + download(SETID_URL, 'flowers', SETID_MD5), + TRAIN_FLAG, + mapper, + buffered_size, + use_xmap, + cycle=cycle) -def test(mapper=test_mapper, buffered_size=1024, use_xmap=True): +def test(mapper=test_mapper, buffered_size=1024, use_xmap=True, cycle=False): ''' Create flowers test set reader. It returns a reader, each sample in the reader is @@ -161,14 +173,20 @@ def test(mapper=test_mapper, buffered_size=1024, use_xmap=True): :type mapper: callable :param buffered_size: the size of buffer used to process images :type buffered_size: int + :param cycle: whether to cycle through the dataset + :type cycle: bool :return: test data reader :rtype: callable ''' return reader_creator( download(DATA_URL, 'flowers', DATA_MD5), download(LABEL_URL, 'flowers', LABEL_MD5), - download(SETID_URL, 'flowers', SETID_MD5), TEST_FLAG, mapper, - buffered_size, use_xmap) + download(SETID_URL, 'flowers', SETID_MD5), + TEST_FLAG, + mapper, + buffered_size, + use_xmap, + cycle=cycle) def valid(mapper=test_mapper, buffered_size=1024, use_xmap=True): diff --git a/python/paddle/v2/dataset/mnist.py b/python/paddle/v2/dataset/mnist.py index 9f675bed89..2b959c48e4 100644 --- a/python/paddle/v2/dataset/mnist.py +++ b/python/paddle/v2/dataset/mnist.py @@ -112,7 +112,7 @@ def fetch(): paddle.v2.dataset.common.download(TRAIN_IMAGE_URL, 'mnist', TRAIN_IMAGE_MD5) paddle.v2.dataset.common.download(TRAIN_LABEL_URL, 'mnist', TRAIN_LABEL_MD5) paddle.v2.dataset.common.download(TEST_IMAGE_URL, 'mnist', TEST_IMAGE_MD5) - paddle.v2.dataset.common.download(TEST_LABEL_URL, 'mnist', TRAIN_LABEL_MD5) + paddle.v2.dataset.common.download(TEST_LABEL_URL, 'mnist', TEST_LABEL_MD5) def convert(path): diff --git a/python/paddle/v2/inference.py b/python/paddle/v2/inference.py index 14b64742fd..28ee042282 100644 --- a/python/paddle/v2/inference.py +++ b/python/paddle/v2/inference.py @@ -63,7 +63,7 @@ class Inference(object): assert isinstance(val, api.Vector) val.copyFromNumpyArray(parameters.get(name).flatten()) # the setValueUpdated function is called in randomize, zeroMem, - # load function in paddle/parameter/Parameter.cpp. But in the + # load function in paddle/legacy/parameter/Parameter.cpp. But in the # inference mode, the setValueUpdated is never called, it will # cause the parameter will not be dispatched # in MultiGradientMachine for multi-GPU. So setValueUpdated is diff --git a/python/setup.py.in b/python/setup.py.in index 8257f1d5e2..a0cb39070b 100644 --- a/python/setup.py.in +++ b/python/setup.py.in @@ -1,14 +1,13 @@ from setuptools import setup, Distribution, Extension import subprocess +import os +import re +import shutil class BinaryDistribution(Distribution): def has_ext_modules(foo): return True -MAJOR = 0 -MINOR = 11 -PATCH = 0 RC = 0 -ISTAGED = False @@ -20,14 +19,47 @@ def git_commit(): git_commit = 'Unknown' return git_commit +def _get_version_detail(idx): + assert idx < 3, "vesion info consists of %(major)d.%(minor)d.%(patch)d, \ + so detail index must less than 3" + + if re.match('@TAG_VERSION_REGEX@', '@PADDLE_VERSION@'): + version_details = '@PADDLE_VERSION@'.split('.') + + if len(version_details) == 3: + return version_details[idx] + + return 0 + +def get_major(): + return int(_get_version_detail(0)) + +def get_minor(): + return int(_get_version_detail(1)) + +def get_patch(): + return str(_get_version_detail(2)) + +def is_taged(): + try: + cmd = ['git', 'describe', '--exact-match', '--tags'] + git_tag = subprocess.Popen(cmd, stdout = subprocess.PIPE).communicate()[0].strip() + except: + return False + + if git_tag.replace('v', '') == '@PADDLE_VERSION@': + return True + else: + return False + def write_version_py(filename='paddle/version.py'): cnt = ''' # THIS FILE IS GENERATED FROM PADDLEPADDLE SETUP.PY # -full_version = '%(major)d.%(minor)d.%(patch)d' +full_version = '%(major)d.%(minor)d.%(patch)s' major = '%(major)d' minor = '%(minor)d' -patch = '%(patch)d' +patch = '%(patch)s' rc = '%(rc)d' istaged = %(istaged)s commit = '%(commit)s' @@ -49,19 +81,20 @@ def mkl(): commit = git_commit() with open(filename, 'w') as f: f.write(cnt % { - 'major': MAJOR, - 'minor': MINOR, - 'patch': PATCH, + 'major': get_major(), + 'minor': get_minor(), + 'patch': get_patch(), 'rc': RC, 'version': '${PADDLE_VERSION}', 'commit': commit, - 'istaged': ISTAGED, + 'istaged': is_taged(), 'with_mkl': '@WITH_MKL@'}) write_version_py(filename='@PADDLE_BINARY_DIR@/python/paddle/version.py') packages=['paddle', + 'paddle.libs', 'paddle.utils', 'paddle.dataset', 'paddle.reader', @@ -93,9 +126,9 @@ if '${CMAKE_SYSTEM_PROCESSOR}' not in ['arm', 'armv7-a', 'aarch64']: paddle_bins = '' if '${WITH_FLUID_ONLY}'== 'OFF': paddle_bin_dir = 'opt/paddle/bin' - paddle_bins = ['${PADDLE_BINARY_DIR}/paddle/trainer/paddle_trainer', - '${PADDLE_BINARY_DIR}/paddle/trainer/paddle_merge_model', - '${PADDLE_BINARY_DIR}/paddle/pserver/paddle_pserver_main', + paddle_bins = ['${PADDLE_BINARY_DIR}/paddle/legacy/trainer/paddle_trainer', + '${PADDLE_BINARY_DIR}/paddle/legacy/trainer/paddle_merge_model', + '${PADDLE_BINARY_DIR}/paddle/legacy/pserver/paddle_pserver_main', '${PADDLE_BINARY_DIR}/paddle/scripts/paddle'] package_data={'paddle.fluid': ['core.so']} @@ -113,12 +146,35 @@ package_dir={ } if '${WITH_FLUID_ONLY}'== 'OFF': package_dir['py_paddle']='${PADDLE_BINARY_DIR}/python/py_paddle' - -paddle_rt_lib_dir = 'lib' -paddle_rt_libs = ['${WARPCTC_LIBRARIES}'] -if '${MKL_SHARED_LIBS}'!= '': - paddle_rt_libs += '${MKL_SHARED_LIBS}'.split(';') +# put all thirdparty libraries in paddle.libs +package_data['paddle.libs']=['libwarpctc.so'] +libs_path='${PADDLE_BINARY_DIR}/python/paddle/libs' +shutil.copy('${WARPCTC_LIBRARIES}', libs_path) +if '${WITH_MKL}' == 'ON': + shutil.copy('${MKLML_LIB}', libs_path) + shutil.copy('${MKLML_IOMP_LIB}', libs_path) + package_data['paddle.libs']+=['libmklml_intel.so','libiomp5.so'] +if '${WITH_MKLDNN}' == 'ON': + # change rpath of libmkldnn.so.0, add $ORIGIN/ to it. + # The reason is that all thirdparty libraries in the same directory, + # thus, libmkldnn.so.0 will find libmklml_intel.so and libiomp5.so. + command = "patchelf --set-rpath '$ORIGIN/' ${MKLDNN_SHARED_LIB}" + if os.system(command) != 0: + raise Exception("patchelf --set-rpath for libmkldnn.so.0 fails") + package_data['paddle.libs']+=['libmkldnn.so.0'] + shutil.copy('${MKLDNN_SHARED_LIB}', libs_path) +# remove unused paddle/libs/__init__.py +os.remove(libs_path+'/__init__.py') +package_dir['paddle.libs']=libs_path + +# change rpath of core.so, add $ORIGIN/../libs/ to it. +# The reason is that libwarpctc.so, libiomp5.so etc are in paddle.libs, and +# core.so is in paddle.fluid, thus paddle/fluid/../libs will pointer to above libraries. +# This operation will fix https://github.com/PaddlePaddle/Paddle/issues/3213 +command = "patchelf --set-rpath '$ORIGIN/../libs/' ${PADDLE_BINARY_DIR}/python/paddle/fluid/core.so" +if os.system(command) != 0: + raise Exception("patchelf --set-rpath for core.so fails") setup(name='${PACKAGE_NAME}', version='${PADDLE_VERSION}', @@ -128,6 +184,5 @@ setup(name='${PACKAGE_NAME}', ext_modules=[Extension('_foo', ['stub.cc'])], package_data=package_data, package_dir=package_dir, - scripts=paddle_bins, - data_files=[(paddle_rt_lib_dir, paddle_rt_libs)] + scripts=paddle_bins ) diff --git a/tools/check_ctest_hung.py b/tools/check_ctest_hung.py new file mode 100644 index 0000000000..7de76c381b --- /dev/null +++ b/tools/check_ctest_hung.py @@ -0,0 +1,53 @@ +# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import sys +import re + + +def escape(input): + o = input.replace("\n", "") + o = o.replace("\r", "") + return o + + +def main(): + usage = """Usage: +1. Download the Paddle_PR_CI_*.log from TeamCity +2. run: python check_ctest_hung.py Paddle_PR_CI_*.log +3. If there is hung ctest, the result likes: +Diff: set(['test_parallel_executor_crf']) + """ + if len(sys.argv) < 2: + print(usage) + exit(0) + + logfile = sys.argv[1] + started = set() + passed = set() + with open(logfile, "r") as fn: + for l in fn.readlines(): + if l.find("Test ") != -1 and \ + l.find("Passed") != -1: + m = re.search("Test\s+#[0-9]*\:\s([a-z0-9_]+)", escape(l)) + passed.add(m.group(1)) + if l.find("Start ") != -1: + start_parts = escape(l).split(" ") + m = re.search("Start\s+[0-9]+\:\s([a-z0-9_]+)", escape(l)) + started.add(m.group(1)) + print "Diff: ", started - passed + + +if __name__ == "__main__": + main() diff --git a/.clang_format.hook b/tools/codestyle/clang_format.hook similarity index 100% rename from .clang_format.hook rename to tools/codestyle/clang_format.hook diff --git a/.copyright.hook b/tools/codestyle/copyright.hook similarity index 100% rename from .copyright.hook rename to tools/codestyle/copyright.hook diff --git a/tools/codestyle/cpplint_pre_commit.hook b/tools/codestyle/cpplint_pre_commit.hook index b194af76dc..2c65222c8a 100755 --- a/tools/codestyle/cpplint_pre_commit.hook +++ b/tools/codestyle/cpplint_pre_commit.hook @@ -4,10 +4,10 @@ TOTAL_ERRORS=0 # The trick to remove deleted files: https://stackoverflow.com/a/2413151 for file in $(git diff --cached --name-status | awk '$1 != "D" {print $2}'); do - if [[ $file =~ ^(paddle/api/.*|paddle/capi/.*|paddle/contrib/.*|paddle/cuda/.*|paddle/function/.*|paddle/gserver/.*|paddle/math/.*|paddle/optimizer/.*|paddle/parameter/.*|paddle/pserver/.*|paddle/trainer/.*|paddle/utils/.*) ]]; then + if [[ $file =~ ^(paddle/legacy/api/.*|paddle/legacy/capi/.*|paddle/contrib/.*|paddle/legacy/cuda/.*|paddle/legacy/function/.*|paddle/legacy/gserver/.*|paddle/legacy/math/.*|paddle/legacy/optimizer/.*|paddle/legacy/parameter/.*|paddle/legacy/pserver/.*|paddle/legacy/trainer/.*|paddle/legacy/utils/.*|paddle/testing/TestUtil.*) ]]; then continue; else - cpplint $file; + cpplint --filter=-readability/fn_size $file; TOTAL_ERRORS=$(expr $TOTAL_ERRORS + $?); fi done diff --git a/tools/codestyle/docstring_checker.py b/tools/codestyle/docstring_checker.py index 54a6904626..8d4b24a0cf 100644 --- a/tools/codestyle/docstring_checker.py +++ b/tools/codestyle/docstring_checker.py @@ -291,6 +291,8 @@ class DocstringChecker(BaseChecker): True if successful otherwise False. """ + if node.name.startswith("__") or node.name.startswith("_"): + return True find = False for t in node.body: if not isinstance(t, astroid.Return): @@ -316,6 +318,8 @@ class DocstringChecker(BaseChecker): Returns: True if successful otherwise False. """ + if node.name.startswith("__") or node.name.startswith("_"): + return True args = [] for arg in node.args.get_children(): if (not isinstance(arg, astroid.AssignName)) \ diff --git a/tools/diff_api.py b/tools/diff_api.py new file mode 100644 index 0000000000..cf9f2c72cb --- /dev/null +++ b/tools/diff_api.py @@ -0,0 +1,31 @@ +#!/usr/bin/env python +from __future__ import print_function +import difflib +import sys + +with open(sys.argv[1], 'r') as f: + origin = f.read() + origin = origin.splitlines() + +with open(sys.argv[2], 'r') as f: + new = f.read() + new = new.splitlines() + +differ = difflib.Differ() +result = differ.compare(origin, new) + +error = False +print('API Difference is: ') +for each_diff in result: + if each_diff[0] in ['-', '?']: # delete or change API is not allowed + error = True + elif each_diff[0] == '+': + # only new layers is allowed. + if not each_diff.startswith('+ paddle.fluid.layers.'): + error = True + + if each_diff[0] != ' ': + print(each_diff) + +if error: + sys.exit(1) diff --git a/tools/print_signatures.py b/tools/print_signatures.py new file mode 100644 index 0000000000..5e7ffd44c7 --- /dev/null +++ b/tools/print_signatures.py @@ -0,0 +1,67 @@ +# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +""" +Print all signature of a python module in alphabet order. + +Usage: + ./print_signature "paddle.fluid" > signature.txt +""" +import importlib +import inspect +import collections +import sys +import pydoc + +member_dict = collections.OrderedDict() + + +def visit_member(parent_name, member): + cur_name = ".".join([parent_name, member.__name__]) + if inspect.isclass(member): + for name, value in inspect.getmembers(member): + if hasattr(value, '__name__') and (not name.startswith("_") or + name == "__init__"): + visit_member(cur_name, value) + elif callable(member): + try: + member_dict[cur_name] = inspect.getargspec(member) + except TypeError: # special for PyBind method + member_dict[cur_name] = " ".join([ + line.strip() for line in pydoc.render_doc(member).split('\n') + if "->" in line + ]) + + else: + raise RuntimeError("Unsupported generate signature of member, type {0}". + format(str(type(member)))) + + +def visit_all_module(mod): + for member_name in ( + name + for name in (mod.__all__ if hasattr(mod, "__all__") else dir(mod)) + if not name.startswith("_")): + instance = getattr(mod, member_name, None) + if instance is None: + continue + if inspect.ismodule(instance): + visit_all_module(instance) + else: + visit_member(mod.__name__, instance) + + +visit_all_module(importlib.import_module(sys.argv[1])) + +for name in member_dict: + print name, member_dict[name]