diff --git a/cmake/external/python.cmake b/cmake/external/python.cmake index 55787f75f8..29247d5c3d 100644 --- a/cmake/external/python.cmake +++ b/cmake/external/python.cmake @@ -26,6 +26,10 @@ IF(PYTHONLIBS_FOUND AND PYTHONINTERP_FOUND) find_python_module(wheel REQUIRED) find_python_module(google.protobuf REQUIRED) FIND_PACKAGE(NumPy REQUIRED) + IF(${PY_GOOGLE.PROTOBUF_VERSION} VERSION_LESS "3.0.0") + MESSAGE(FATAL_ERROR "Found Python Protobuf ${PY_GOOGLE.PROTOBUF_VERSION} < 3.0.0, " + "please use pip to upgrade protobuf.") + ENDIF(${PY_GOOGLE.PROTOBUF_VERSION} VERSION_LESS "3.0.0") ELSE(PYTHONLIBS_FOUND AND PYTHONINTERP_FOUND) ##################################### PYTHON ######################################## SET(PYTHON_SOURCES_DIR ${THIRD_PARTY_PATH}/python) diff --git a/cmake/python_module.cmake b/cmake/python_module.cmake index 2eb3441428..1412b7f7f2 100644 --- a/cmake/python_module.cmake +++ b/cmake/python_module.cmake @@ -26,5 +26,18 @@ function(find_python_module module) if(NOT PY_${module_upper}_FOUND AND ${module}_FIND_REQUIRED) message(FATAL_ERROR "python module ${module} is not found") endif() + + execute_process(COMMAND "${PYTHON_EXECUTABLE}" "-c" + "import sys, ${module}; sys.stdout.write(${module}.__version__)" + OUTPUT_VARIABLE _${module}_version + RESULT_VARIABLE _${module}_status + ERROR_QUIET + OUTPUT_STRIP_TRAILING_WHITESPACE) + if(NOT _${module}_status) + set(PY_${module_upper}_VERSION ${_${module}_version} CACHE STRING + "Version of Python module ${module}") + endif(NOT _${module}_status) + set(PY_${module_upper}_FOUND ${PY_${module_upper}_FOUND} PARENT_SCOPE) + set(PY_${module_upper}_VERSION ${PY_${module_upper}_VERSION} PARENT_SCOPE) endfunction(find_python_module) diff --git a/doc/howto/usage/k8s/k8s_distributed_cn.md b/doc/howto/usage/k8s/k8s_distributed_cn.md index b63b8437a0..2063b98ca8 100644 --- a/doc/howto/usage/k8s/k8s_distributed_cn.md +++ b/doc/howto/usage/k8s/k8s_distributed_cn.md @@ -159,6 +159,8 @@ docker build -t your_repo/paddle:mypaddle . docker push your_repo/paddle:mypaddle ``` +注意上述命令中`your_repo`表示读者所使用的Docker镜像仓库地址,读者需要替换成自己使用的仓库地址。下文使用`your_repo/paddle:mypaddle`这个地址来表示此步骤所构建出的镜像。 + ### 上传训练文件 本文使用PaddlePaddle官方的[recommendation demo](http://www.paddlepaddle.org/doc/demo/index.html#recommendation)作为这次训练的内容,我们将训练文件与数据放在一个job name命名的目录中,上传到MFS共享存储。完成后MFS上的文件内容大致如下: @@ -244,6 +246,8 @@ spec: `CONF_PADDLE_GRADIENT_NUM`表示训练节点数量,即`--num_gradient_servers`参数 +这些参数的具体描述,读者可以查看[这里](http://www.paddlepaddle.org/doc/ui/cmd_argument/detail_introduction.html#parameter-server-and-distributed-communication)。 + 编写完YAML文件后,可以使用Kubernetes的命令行工具创建job。 ```bash diff --git a/paddle/math/Matrix.cpp b/paddle/math/Matrix.cpp index 90813a8996..3ae237bc7d 100644 --- a/paddle/math/Matrix.cpp +++ b/paddle/math/Matrix.cpp @@ -1311,7 +1311,9 @@ void GpuMatrix::paramReluForward(Matrix& data, Matrix& W) { real* w = W.getData(); size_t numElements = data.getWidth(); size_t numSamples = data.getHeight(); - size_t partial_sum = numElements / (W.getHeight() * W.getWidth()); + size_t paraSize = W.getHeight() * W.getWidth(); + CHECK(!(numElements % paraSize)); // this check from ParameterReluLayer::init + size_t partial_sum = numElements / paraSize; real* output = getData(); hl_param_relu_forward(output, input, w, numElements, numSamples, partial_sum); } @@ -1324,7 +1326,9 @@ void GpuMatrix::paramReluBackwardW(Matrix& oGrad, Matrix& data) { real* wgrad = data_; size_t numElements = data.getWidth(); size_t numSamples = data.getHeight(); - size_t partial_sum = numElements / (this->getHeight() * this->getWidth()); + size_t paraSize = this->getHeight() * this->getWidth(); + CHECK(!(numElements % paraSize)); // this check from ParameterReluLayer::init + size_t partial_sum = numElements / paraSize; hl_param_relu_backward_w( wgrad, ograd, input, numElements, numSamples, partial_sum); } @@ -1336,7 +1340,9 @@ void GpuMatrix::paramReluBackwardDiff(Matrix& oGrad, Matrix& data, Matrix& W) { real* w = W.getData(); size_t numElements = data.getWidth(); size_t numSamples = data.getHeight(); - size_t partial_sum = numElements / (W.getHeight() * W.getWidth()); + size_t paraSize = W.getHeight() * W.getWidth(); + CHECK(!(numElements % paraSize)); // this check from ParameterReluLayer::init + size_t partial_sum = numElements / paraSize; hl_param_relu_backward_diff( ograd, input, w, diff, numElements, numSamples, partial_sum); } @@ -3764,7 +3770,9 @@ void CpuMatrix::paramReluForward(Matrix& data, Matrix& W) { real* w = W.getData(); size_t numElements = data.getWidth(); size_t numSamples = data.getHeight(); - size_t partial_sum = numElements / (W.getHeight() * W.getWidth()); + size_t paraSize = W.getHeight() * W.getWidth(); + CHECK(!(numElements % paraSize)); // this check from ParameterReluLayer::init + size_t partial_sum = numElements / paraSize; for (size_t n = 0, k = 0; n < numSamples; ++n) { for (size_t i = 0; i < numElements; ++i, ++k) { data_[k] = input[k] > 0 ? input[k] : input[k] * w[i / partial_sum]; @@ -3778,7 +3786,9 @@ void CpuMatrix::paramReluBackwardW(Matrix& oGrad, Matrix& data) { real* wgrad = data_; size_t numElements = data.getWidth(); size_t numSamples = data.getHeight(); - size_t partial_sum = numElements / (this->getHeight() * this->getWidth()); + size_t paraSize = this->getHeight() * this->getWidth(); + CHECK(!(numElements % paraSize)); // this check from ParameterReluLayer::init + size_t partial_sum = numElements / paraSize; for (size_t n = 0, k = 0; n < numSamples; ++n) { for (size_t i = 0; i < numElements; ++i, ++k) { wgrad[i / partial_sum] += ograd[k] * (input[k] > 0 ? 0 : input[k]); @@ -3793,7 +3803,9 @@ void CpuMatrix::paramReluBackwardDiff(Matrix& oGrad, Matrix& data, Matrix& W) { real* w = W.getData(); size_t numElements = data.getWidth(); size_t numSamples = data.getHeight(); - size_t partial_sum = numElements / (W.getHeight() * W.getWidth()); + size_t paraSize = W.getHeight() * W.getWidth(); + CHECK(!(numElements % paraSize)); // this check from ParameterReluLayer::init + size_t partial_sum = numElements / paraSize; for (size_t n = 0, k = 0; n < numSamples; ++n) { for (size_t i = 0; i < numElements; ++i, ++k) { diff[k] += ograd[k] * (input[k] > 0 ? 1 : w[i / partial_sum]); diff --git a/paddle/math/tests/test_Matrix.cpp b/paddle/math/tests/test_Matrix.cpp index 6899769144..a4084bdf7c 100644 --- a/paddle/math/tests/test_Matrix.cpp +++ b/paddle/math/tests/test_Matrix.cpp @@ -224,10 +224,11 @@ void testParamReluBackwardW(int height, int width, int w_height, int w_width) { } TEST(Matrix, paramRelu) { - for (auto height : {10, 100}) { - for (auto width : {10, 100}) { + for (auto height : {10, 40, 100}) { + for (auto width : {10, 40, 100}) { for (auto w_height : {1, 2}) { for (auto w_width : {1, 2}) { + if (width % (w_height * w_width)) continue; testParamReluForward(height, width, w_height, w_width); testParamReluBackwardW(height, width, w_height, w_width); } diff --git a/paddle/math/tests/test_matrixCompare.cpp b/paddle/math/tests/test_matrixCompare.cpp index 3a780d26c0..f0c49791d7 100644 --- a/paddle/math/tests/test_matrixCompare.cpp +++ b/paddle/math/tests/test_matrixCompare.cpp @@ -773,10 +773,11 @@ void testParamReluBackwardDiff(int height, } TEST(Matrix, paramReluBackwardDiff) { - for (auto height : {10, 100}) { - for (auto width : {10, 100}) { + for (auto height : {10, 40, 100}) { + for (auto width : {10, 40, 100}) { for (auto w_height : {1, 2}) { for (auto w_width : {1, 2}) { + if (width % (w_height * w_width)) continue; testParamReluBackwardDiff(height, width, w_height, w_width); } } diff --git a/paddle/setup.py.in b/paddle/setup.py.in index e3650bf1c0..c79666bc81 100644 --- a/paddle/setup.py.in +++ b/paddle/setup.py.in @@ -70,6 +70,6 @@ setup(name="py_paddle", include_dirs = include_dirs, install_requires = [ 'numpy>=1.8.0', # The numpy is required. - 'protobuf>=2.4.1' # The paddle protobuf version + 'protobuf>=3.0.0' # The paddle protobuf version ], )