From 0802197924d884c7d8a9531c541d9d4e4f376885 Mon Sep 17 00:00:00 2001 From: Zhuoyuan Date: Wed, 2 Aug 2017 16:00:06 -0700 Subject: [PATCH 01/55] gather and scatter-update added --- paddle/operators/gather_func.h | 114 ++++++++++++++++++++++++++++++ paddle/operators/scatter_func.h | 119 ++++++++++++++++++++++++++++++++ 2 files changed, 233 insertions(+) create mode 100644 paddle/operators/gather_func.h create mode 100644 paddle/operators/scatter_func.h diff --git a/paddle/operators/gather_func.h b/paddle/operators/gather_func.h new file mode 100644 index 0000000000..09e751ce17 --- /dev/null +++ b/paddle/operators/gather_func.h @@ -0,0 +1,114 @@ +/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#pragma once +#include +#include "paddle/framework/tensor.h" +#include "paddle/platform/place.h" +#include "paddle/framework/ddim.h" + +/** + * Return a new tensor from source tensor, gathered according to index + * input[src]: type-T source Tensor + * input[Index]: type-int index Tensor (1-D) + * return: output tensor + */ +template +Tensor* Gather_func(Tensor* Src, Tensor* Index) { + // assert index is an int-type tensor? + // assert(Index->istype(int)); + + // check index of shape 1-D + assert(Index->dims().size()==1); + int index_size = Index->dims()[0]; + + // Source shape + auto src_dims = Src->dims(); + DDim output_dims(dims_src); + // Create a tensor of shape [index_size, dim_src[1:]] + output_dims[0] = index_size; + + Tensor* New_tensor; + float* output = nullptr; + + /* slice size */ + int slice_size = 1; + for(unsigned int i = 0; i < src_dims.size(); ++i) + slice_size *= src_dims[i]; + + /* Gathering */ + if (place == CPUPlace()) { + // init for CPU + output = New_tensor.mutable_data(output_dims, CPUPlace()); + CPUGather(Src->data(), Index->data(), slice_size, new_tensor->mutable_data()); + } else { // GPU + // init for GPU + output = New_tensor.mutable_data(output_dims, GPUPlace()); + /* how to specialize device??*/ + GPUGather(d, Src->data(), Index->data(), slice_size, new_tensor->mutable_data()); + } + return New_tensor; +} + +/* Implementation of CPU copy */ +template +void CPUGather(const T* params, const int* indices, + const int slice_size, const int index_size, + T* output) { + const size_t slice_bytes = slice_size * sizeof(T); + + for(int i = 0; i < index_size; ++i) + int index_ = indices[i]; + /* copy src[index_] to output[i] */ + memcpy(output + i * slice_bytes, + params + index_ * slice_bytes, + slice_bytes); +} + +/* Implementation of GPU copy: + I suppose the GPUDevice& d, contains gpu_id and thread_id + d = cuda_stream(gpu_id_, stream_id_); +*/ +template +void GPUGather(const GPUDevice& d, + const T* src, const int* Index, + const int slice_size, const int index_size, + T* output) { + int block_count = slice_size * index_size; + int thread_per_block = 1024; + + GatherOpKernel + <<>>( + src, Index, output, slice_size, + indices_size, slice_size, out_size); +} + +template +__global__ void GatherOpKernel(const T* params, const int* indices, T* out, + int64 indices_size, + int64 slice_size, int64 out_size) { + /* I suppose we have the following macro, + which I strongly suggest that we should put in cuda: + #define CUDA_1D_KERNEL_LOOP(i, n) \ + for (int i = blockIdx.x * blockDim.x + threadIdx.x; i < n; \ + i += blockDim.x * gridDim.x) + */ + CUDA_1D_KERNEL_LOOP(i, out_size) { + int indices_i = i / slice_size; + int slice_i = i - indices_i * slice_size; // offset inside the slice + int gather_i = indices[indices_i]; + int params_i = gather_i * slice_size + slice_i; + out[i] = *(params + params_i); + } +} diff --git a/paddle/operators/scatter_func.h b/paddle/operators/scatter_func.h new file mode 100644 index 0000000000..6ee3fdf3a3 --- /dev/null +++ b/paddle/operators/scatter_func.h @@ -0,0 +1,119 @@ +/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#pragma once +#include +#include "paddle/framework/tensor.h" +#include "paddle/platform/place.h" +#include "paddle/framework/ddim.h" + +/** + * Return a updated tensor from source tensor, scattered according to index: + * dst[i] += src[index[i]] + * input[src]: type-T source Tensor + * input[Index]: type-int index Tensor (1-D) + * return: output tensor + */ +template +void ScatterUpdate_func(Tensor* Src, Tensor* Dst, Tensor* Index) { + // assert index is an int-type tensor + assert(Index->istype(int)); + + // Source shape + auto src_dims = Src->dims(); + auto dst_dims = Dst->dims(); + DDim output_dims(dims_src); + + // check Src shape and Dst shape should match + for(int i = 1; i < src_dims.size(); i++) + assert(src_dims[i]==dst_dims[i]); + + int index_size = Index->dims()[0]; + + /* slice size */ + int slice_size = 1; + for(unsigned int i = 0; i < src_dims.size(); ++i) + slice_size *= src_dims[i]; + + if (place == CPUPlace()) { + // init + output = new_tensor.mutable_data(output_dims, CPUPlace()); + CPUScatterUpdate(src->data(), index->data(), slice_size, new_tensor->mutable_data()); + + } else { // GPU + // init + output = new_tensor.mutable_data(output_dims, GPUPlace()); + /* how to specialize device??*/ + GPUScatterUpdate(d, src->data(), index->data(), slice_size, new_tensor->mutable_data()); + } +} + +/* Implementation of CPU copy */ +template +void CPUScatterUpdate(const T* src, const int* Index, + const int slice_size, const int index_size, + T* output) { + //const size_t slice_bytes = slice_size * sizeof(T); + + for(int i = 0; i < index_size; ++i) + int index_ = index[i]; + /* dst[index_] += src[index_] + add operation size: slice_size + */ + math::vAdd(slice_size, src + index_ * slice_bytes, + output + i * slice_bytes, + output + i * slice_bytes); + /* Scatter update, not just assign + memcpy(output + i * slice_bytes, + src + index_ * slice_bytes, + slice_bytes); + */ +} + +/* Implementation of GPU scatter: + I suppose the GPUDevice& d, contains gpu_id and thread_id + d = cuda_stream(gpu_id_, stream_id_); +*/ +template +void GPUScatterUpdate(const GPUDevice& d, + const T* src, const int* Index, + const int slice_size, const int index_size, + T* output) { + int block_count = slice_size * index_size; + int thread_per_block = 1024; + + ScatterOpKernel + <<>>( + src, Index, output, slice_size, + indices_size, slice_size, out_size); +} + +template +__global__ void ScatterOpKernel(const T* params, const int* indices, T* out, + int64 indices_size, + int64 slice_size, int64 out_size) { + /* I suppose we have the following macro, + which I strongly suggest that we should put in cuda: + #define CUDA_1D_KERNEL_LOOP(i, n) \ + for (int i = blockIdx.x * blockDim.x + threadIdx.x; i < n; \ + i += blockDim.x * gridDim.x) + */ + CUDA_1D_KERNEL_LOOP(i, out_size) { + int indices_i = i / slice_size; + int slice_i = i - indices_i * slice_size; // offset inside the slice + int scatter_i = indices[indices_i]; + int params_i = scatter_i * slice_size + slice_i; + out[i] += *(params + params_i); + } +} From 2b35fca18f66e5f92315e369a687a5e908aedf1e Mon Sep 17 00:00:00 2001 From: Zhuoyuan Date: Wed, 2 Aug 2017 22:34:58 -0700 Subject: [PATCH 02/55] gather modify --- paddle/operators/gather_func.h | 71 ++++++++++++++++------------------ 1 file changed, 34 insertions(+), 37 deletions(-) diff --git a/paddle/operators/gather_func.h b/paddle/operators/gather_func.h index 09e751ce17..e255bd7d15 100644 --- a/paddle/operators/gather_func.h +++ b/paddle/operators/gather_func.h @@ -21,44 +21,41 @@ limitations under the License. */ /** * Return a new tensor from source tensor, gathered according to index * input[src]: type-T source Tensor - * input[Index]: type-int index Tensor (1-D) + * input[index]: type-int index Tensor (1-D) * return: output tensor */ -template -Tensor* Gather_func(Tensor* Src, Tensor* Index) { - // assert index is an int-type tensor? - // assert(Index->istype(int)); +template +Tensor* Gather(Tensor* src, Tensor* index) { + // check index of shape 1-D + PADDLE_ENFORCE(index->dims().size()==1); + int index_size = index->dims()[0]; - // check index of shape 1-D - assert(Index->dims().size()==1); - int index_size = Index->dims()[0]; + // Source shape + auto src_dims = src->dims(); + DDim output_dims(dims_src); + // Create a tensor of shape [index_size, dim_src[1:]] + output_dims[0] = index_size; - // Source shape - auto src_dims = Src->dims(); - DDim output_dims(dims_src); - // Create a tensor of shape [index_size, dim_src[1:]] - output_dims[0] = index_size; + Tensor* New_tensor; + float* output = nullptr; - Tensor* New_tensor; - float* output = nullptr; + /* slice size */ + int slice_size = 1; + for(unsigned int i = 0; i < src_dims.size(); ++i) + slice_size *= src_dims[i]; - /* slice size */ - int slice_size = 1; - for(unsigned int i = 0; i < src_dims.size(); ++i) - slice_size *= src_dims[i]; - - /* Gathering */ - if (place == CPUPlace()) { - // init for CPU - output = New_tensor.mutable_data(output_dims, CPUPlace()); - CPUGather(Src->data(), Index->data(), slice_size, new_tensor->mutable_data()); - } else { // GPU - // init for GPU - output = New_tensor.mutable_data(output_dims, GPUPlace()); - /* how to specialize device??*/ - GPUGather(d, Src->data(), Index->data(), slice_size, new_tensor->mutable_data()); - } - return New_tensor; + /* Gathering */ + if (place == CPUPlace()) { + // init for CPU + output = New_tensor.mutable_data(output_dims, CPUPlace()); + CPUGather(src->data(), index->data(), slice_size, new_tensor->mutable_data()); + } else { // GPU + // init for GPU + output = New_tensor.mutable_data(output_dims, GPUPlace()); + /* how to specialize device??*/ + GPUGather(d, src->data(), index->data(), slice_size, new_tensor->mutable_data()); + } + return New_tensor; } /* Implementation of CPU copy */ @@ -82,15 +79,15 @@ void CPUGather(const T* params, const int* indices, */ template void GPUGather(const GPUDevice& d, - const T* src, const int* Index, + const T* src, const int* index, const int slice_size, const int index_size, T* output) { - int block_count = slice_size * index_size; - int thread_per_block = 1024; + int block_count = slice_size * index_size; + int thread_per_block = 1024; - GatherOpKernel + GatherOpKernel <<>>( - src, Index, output, slice_size, + src, index, output, slice_size, indices_size, slice_size, out_size); } From eef55ca700a4f75e76996bbab04224470bb80f36 Mon Sep 17 00:00:00 2001 From: Zhuoyuan Date: Thu, 3 Aug 2017 01:02:40 -0700 Subject: [PATCH 03/55] remodify --- paddle/operators/gather_func.h | 76 ++++++++++-------- paddle/operators/scatter_func.h | 137 ++++++++++++++++---------------- 2 files changed, 108 insertions(+), 105 deletions(-) diff --git a/paddle/operators/gather_func.h b/paddle/operators/gather_func.h index e255bd7d15..5975675cbb 100644 --- a/paddle/operators/gather_func.h +++ b/paddle/operators/gather_func.h @@ -14,9 +14,9 @@ limitations under the License. */ #pragma once #include +#include "paddle/framework/ddim.h" #include "paddle/framework/tensor.h" #include "paddle/platform/place.h" -#include "paddle/framework/ddim.h" /** * Return a new tensor from source tensor, gathered according to index @@ -27,7 +27,7 @@ limitations under the License. */ template Tensor* Gather(Tensor* src, Tensor* index) { // check index of shape 1-D - PADDLE_ENFORCE(index->dims().size()==1); + PADDLE_ENFORCE(index->dims().size() == 1); int index_size = index->dims()[0]; // Source shape @@ -41,61 +41,67 @@ Tensor* Gather(Tensor* src, Tensor* index) { /* slice size */ int slice_size = 1; - for(unsigned int i = 0; i < src_dims.size(); ++i) - slice_size *= src_dims[i]; + for (size_t i = 0; i < src_dims.size(); ++i) slice_size *= src_dims[i]; /* Gathering */ if (place == CPUPlace()) { - // init for CPU - output = New_tensor.mutable_data(output_dims, CPUPlace()); - CPUGather(src->data(), index->data(), slice_size, new_tensor->mutable_data()); - } else { // GPU - // init for GPU - output = New_tensor.mutable_data(output_dims, GPUPlace()); - /* how to specialize device??*/ - GPUGather(d, src->data(), index->data(), slice_size, new_tensor->mutable_data()); + // init for CPU + output = New_tensor.mutable_data(output_dims, CPUPlace()); + CPUGather( + src->data(), index->data(), slice_size, new_tensor->mutable_data()); + } else { // GPU + // init for GPU + output = New_tensor.mutable_data(output_dims, GPUPlace()); + /* how to specialize device??*/ + GPUGather( + d, src->data(), index->data(), slice_size, new_tensor->mutable_data()); } return New_tensor; } /* Implementation of CPU copy */ -template -void CPUGather(const T* params, const int* indices, - const int slice_size, const int index_size, - T* output) { +template +void CPUGather(const T* params, + const int* indices, + const int slice_size, + const int index_size, + T* output) { const size_t slice_bytes = slice_size * sizeof(T); - for(int i = 0; i < index_size; ++i) - int index_ = indices[i]; - /* copy src[index_] to output[i] */ - memcpy(output + i * slice_bytes, - params + index_ * slice_bytes, - slice_bytes); + for (size_t i = 0; i < index_size; ++i) { + int index_ = indices[i]; + /* copy src[index_] to output[i] */ + memcpy( + output + i * slice_bytes, params + index_ * slice_bytes, slice_bytes); + } } /* Implementation of GPU copy: I suppose the GPUDevice& d, contains gpu_id and thread_id d = cuda_stream(gpu_id_, stream_id_); */ -template +template void GPUGather(const GPUDevice& d, - const T* src, const int* index, - const int slice_size, const int index_size, - T* output) { + const T* src, + const int* index, + const int slice_size, + const int index_size, + T* output) { int block_count = slice_size * index_size; int thread_per_block = 1024; - GatherOpKernel - <<>>( - src, index, output, slice_size, - indices_size, slice_size, out_size); + GatherOpKernel<<>>( + src, index, output, slice_size, indices_size, slice_size, out_size); } template -__global__ void GatherOpKernel(const T* params, const int* indices, T* out, +__global__ void GatherOpKernel(const T* params, + const int* indices, + T* out, int64 indices_size, - int64 slice_size, int64 out_size) { - /* I suppose we have the following macro, + int64 slice_size, + int64 out_size) { + /* I suppose we have the following macro, which I strongly suggest that we should put in cuda: #define CUDA_1D_KERNEL_LOOP(i, n) \ for (int i = blockIdx.x * blockDim.x + threadIdx.x; i < n; \ @@ -103,9 +109,9 @@ __global__ void GatherOpKernel(const T* params, const int* indices, T* out, */ CUDA_1D_KERNEL_LOOP(i, out_size) { int indices_i = i / slice_size; - int slice_i = i - indices_i * slice_size; // offset inside the slice + int slice_i = i - indices_i * slice_size; // offset inside the slice int gather_i = indices[indices_i]; int params_i = gather_i * slice_size + slice_i; out[i] = *(params + params_i); - } + } } diff --git a/paddle/operators/scatter_func.h b/paddle/operators/scatter_func.h index 6ee3fdf3a3..53b260170f 100644 --- a/paddle/operators/scatter_func.h +++ b/paddle/operators/scatter_func.h @@ -14,96 +14,93 @@ limitations under the License. */ #pragma once #include +#include "paddle/framework/ddim.h" #include "paddle/framework/tensor.h" #include "paddle/platform/place.h" -#include "paddle/framework/ddim.h" /** * Return a updated tensor from source tensor, scattered according to index: * dst[i] += src[index[i]] * input[src]: type-T source Tensor - * input[Index]: type-int index Tensor (1-D) + * input[index]: type-int index Tensor (1-D) * return: output tensor */ -template -void ScatterUpdate_func(Tensor* Src, Tensor* Dst, Tensor* Index) { - // assert index is an int-type tensor - assert(Index->istype(int)); - - // Source shape - auto src_dims = Src->dims(); - auto dst_dims = Dst->dims(); - DDim output_dims(dims_src); - - // check Src shape and Dst shape should match - for(int i = 1; i < src_dims.size(); i++) - assert(src_dims[i]==dst_dims[i]); - - int index_size = Index->dims()[0]; - - /* slice size */ - int slice_size = 1; - for(unsigned int i = 0; i < src_dims.size(); ++i) - slice_size *= src_dims[i]; - - if (place == CPUPlace()) { - // init - output = new_tensor.mutable_data(output_dims, CPUPlace()); - CPUScatterUpdate(src->data(), index->data(), slice_size, new_tensor->mutable_data()); - - } else { // GPU - // init - output = new_tensor.mutable_data(output_dims, GPUPlace()); - /* how to specialize device??*/ - GPUScatterUpdate(d, src->data(), index->data(), slice_size, new_tensor->mutable_data()); - } +template +void ScatterUpdate(Tensor* src, Tensor* dst, Tensor* index) { + // Source shape + auto src_dims = src->dims(); + auto dst_dims = dst->dims(); + DDim output_dims(dims_src); + + // check src shape and dst shape should match + for (size_t i = 1; i < src_dims.size(); i++) + PADDLE_ENFORCE(src_dims[i] == dst_dims[i]); + + int index_size = index->dims()[0]; + + /* slice size */ + int slice_size = 1; + for (size_t i = 0; i < src_dims.size(); ++i) slice_size *= src_dims[i]; + + if (place == CPUPlace()) { + // init + output = new_tensor.mutable_data(output_dims, CPUPlace()); + CPUScatterUpdate( + src->data(), index->data(), slice_size, new_tensor->mutable_data()); + + } else { // GPU + // init + output = new_tensor.mutable_data(output_dims, GPUPlace()); + /* how to specialize device??*/ + GPUScatterUpdate( + d, src->data(), index->data(), slice_size, new_tensor->mutable_data()); + } } /* Implementation of CPU copy */ -template -void CPUScatterUpdate(const T* src, const int* Index, - const int slice_size, const int index_size, - T* output) { - //const size_t slice_bytes = slice_size * sizeof(T); - - for(int i = 0; i < index_size; ++i) - int index_ = index[i]; - /* dst[index_] += src[index_] - add operation size: slice_size - */ - math::vAdd(slice_size, src + index_ * slice_bytes, - output + i * slice_bytes, - output + i * slice_bytes); - /* Scatter update, not just assign - memcpy(output + i * slice_bytes, - src + index_ * slice_bytes, - slice_bytes); - */ +template +void CPUScatterUpdate(const T* src, + const int* index, + const int slice_size, + const int index_size, + T* output) { + // const size_t slice_bytes = slice_size * sizeof(T); + + for (size_t i = 0; i < index_size; ++i) { + int index_ = index[i]; + math::vAdd(slice_size, + src + index_ * slice_bytes, + output + i * slice_bytes, + output + i * slice_bytes); + } } /* Implementation of GPU scatter: I suppose the GPUDevice& d, contains gpu_id and thread_id d = cuda_stream(gpu_id_, stream_id_); */ -template +template void GPUScatterUpdate(const GPUDevice& d, - const T* src, const int* Index, - const int slice_size, const int index_size, - T* output) { - int block_count = slice_size * index_size; - int thread_per_block = 1024; - - ScatterOpKernel - <<>>( - src, Index, output, slice_size, - indices_size, slice_size, out_size); + const T* src, + const int* index, + const int slice_size, + const int index_size, + T* output) { + int block_count = slice_size * index_size; + int thread_per_block = 1024; + + ScatterOpKernel<<>>( + src, index, output, slice_size, indices_size, slice_size, out_size); } template -__global__ void ScatterOpKernel(const T* params, const int* indices, T* out, - int64 indices_size, - int64 slice_size, int64 out_size) { - /* I suppose we have the following macro, +__global__ void ScatterOpKernel(const T* params, + const int* indices, + T* out, + int64 indices_size, + int64 slice_size, + int64 out_size) { + /* I suppose we have the following macro, which I strongly suggest that we should put in cuda: #define CUDA_1D_KERNEL_LOOP(i, n) \ for (int i = blockIdx.x * blockDim.x + threadIdx.x; i < n; \ @@ -111,9 +108,9 @@ __global__ void ScatterOpKernel(const T* params, const int* indices, T* out, */ CUDA_1D_KERNEL_LOOP(i, out_size) { int indices_i = i / slice_size; - int slice_i = i - indices_i * slice_size; // offset inside the slice + int slice_i = i - indices_i * slice_size; // offset inside the slice int scatter_i = indices[indices_i]; int params_i = scatter_i * slice_size + slice_i; out[i] += *(params + params_i); - } + } } From def959a8909c6425ca96c1deec7b00e08ae0df81 Mon Sep 17 00:00:00 2001 From: tensor-tang Date: Fri, 4 Aug 2017 13:33:50 +0800 Subject: [PATCH 04/55] add mkldnn fc files --- paddle/gserver/CMakeLists.txt | 11 ++++++ paddle/gserver/layers/MkldnnFcLayer.cpp | 30 +++++++++++++++++ paddle/gserver/layers/MkldnnFcLayer.h | 42 +++++++++++++++++++++++ paddle/gserver/layers/MkldnnLayer.h | 45 +++++++++++++++++++++++++ 4 files changed, 128 insertions(+) create mode 100644 paddle/gserver/layers/MkldnnFcLayer.cpp create mode 100644 paddle/gserver/layers/MkldnnFcLayer.h create mode 100644 paddle/gserver/layers/MkldnnLayer.h diff --git a/paddle/gserver/CMakeLists.txt b/paddle/gserver/CMakeLists.txt index 0012636b8f..1305d5438a 100644 --- a/paddle/gserver/CMakeLists.txt +++ b/paddle/gserver/CMakeLists.txt @@ -23,6 +23,17 @@ endmacro() filter_test(GSERVER_HEADER) filter_test(GSERVER_SOURCES) + +if(NOT WITH_MKLDNN) + file(GLOB_RECURSE DNN_HEADER RELATIVE "${CMAKE_CURRENT_SOURCE_DIR}" "Mkldnn*.h") + file(GLOB_RECURSE DNN_SOURCES RELATIVE "${CMAKE_CURRENT_SOURCE_DIR}" "Mkldnn*.cpp") + list(REMOVE_ITEM GSERVER_HEADER ${DNN_HEADER}) + list(REMOVE_ITEM GSERVER_SOURCES ${DNN_SOURCES}) + message(STATUS "Skip compiling with Mkldnnlayers and MkldnnActivations") +else() + message(STATUS "Compile with Mkldnnlayers and MkldnnActivations") +endif() + if(NOT WITH_GPU) list(REMOVE_ITEM GSERVER_HEADER layers/CudnnConvBaseLayer.h diff --git a/paddle/gserver/layers/MkldnnFcLayer.cpp b/paddle/gserver/layers/MkldnnFcLayer.cpp new file mode 100644 index 0000000000..f8220a2553 --- /dev/null +++ b/paddle/gserver/layers/MkldnnFcLayer.cpp @@ -0,0 +1,30 @@ +/* Copyright (c) 2017 PaddlePaddle Authors. All Rights Reserve. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#include "MkldnnFcLayer.h" + +namespace paddle { + +REGISTER_LAYER(mkldnn_fc, MkldnnFcLayer); + +bool MkldnnFcLayer::init(const LayerMap& layerMap, + const ParameterMap& parameterMap) { + return MkldnnLayer::init(layerMap, parameterMap); +} + +void MkldnnFcLayer::forward(PassType passType) {} + +void MkldnnFcLayer::backward(const UpdateCallback& callback) {} + +} // namespace paddle diff --git a/paddle/gserver/layers/MkldnnFcLayer.h b/paddle/gserver/layers/MkldnnFcLayer.h new file mode 100644 index 0000000000..430567949d --- /dev/null +++ b/paddle/gserver/layers/MkldnnFcLayer.h @@ -0,0 +1,42 @@ +/* Copyright (c) 2017 PaddlePaddle Authors. All Rights Reserve. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#pragma once + +#include "MkldnnLayer.h" +#include "mkldnn.hpp" + +namespace paddle { + +/** + * @brief A subclass of MkldnnLayer fc layer. + * + * The config file api is mkldnn_fc + */ +class MkldnnFcLayer : public MkldnnLayer { +protected: +public: + explicit MkldnnFcLayer(const LayerConfig& config) : MkldnnLayer(config) {} + + ~MkldnnFcLayer() {} + + bool init(const LayerMap& layerMap, + const ParameterMap& parameterMap) override; + + void forward(PassType passType) override; + + void backward(const UpdateCallback& callback) override; +}; + +} // namespace paddle diff --git a/paddle/gserver/layers/MkldnnLayer.h b/paddle/gserver/layers/MkldnnLayer.h new file mode 100644 index 0000000000..e9bab68b07 --- /dev/null +++ b/paddle/gserver/layers/MkldnnLayer.h @@ -0,0 +1,45 @@ +/* Copyright (c) 2017 PaddlePaddle Authors. All Rights Reserve. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#pragma once + +#include +#include "Layer.h" +#include "mkldnn.hpp" + +namespace paddle { + +class MkldnnLayer; +typedef std::shared_ptr MkldnnLayerPtr; + +/** + * @brief Base class of Mkldnnlayer. + * + */ +class MkldnnLayer : public Layer { +public: + explicit MkldnnLayer(const LayerConfig& config) : Layer(config) {} + + ~MkldnnLayer() {} + + virtual bool init(const LayerMap& layerMap, + const ParameterMap& parameterMap) { + return Layer::init(layerMap, parameterMap); + // TODO(TJ): deivecId + } + + void resetOutput(size_t height, size_t width) { ; } +}; + +} // namespace paddle From 3c3a11a0dc780498a7c890be90b9df922b426d90 Mon Sep 17 00:00:00 2001 From: tensor-tang Date: Fri, 4 Aug 2017 13:50:41 +0800 Subject: [PATCH 05/55] add use_mkldnn flag --- paddle/gserver/layers/MkldnnLayer.h | 4 +++- paddle/trainer/TrainerConfigHelper.cpp | 2 ++ paddle/utils/Flags.cpp | 7 +++++++ paddle/utils/Flags.h | 1 + python/paddle/trainer/config_parser.py | 24 +++++++++++++++++++++--- 5 files changed, 34 insertions(+), 4 deletions(-) diff --git a/paddle/gserver/layers/MkldnnLayer.h b/paddle/gserver/layers/MkldnnLayer.h index e9bab68b07..7e6d88b273 100644 --- a/paddle/gserver/layers/MkldnnLayer.h +++ b/paddle/gserver/layers/MkldnnLayer.h @@ -35,8 +35,10 @@ public: virtual bool init(const LayerMap& layerMap, const ParameterMap& parameterMap) { - return Layer::init(layerMap, parameterMap); + CHECK(FLAGS_use_mkldnn) << "MkldnnLayers only support use_mkldnn." + << "Please set WITH_MKLDNN=ON"; // TODO(TJ): deivecId + return Layer::init(layerMap, parameterMap); } void resetOutput(size_t height, size_t width) { ; } diff --git a/paddle/trainer/TrainerConfigHelper.cpp b/paddle/trainer/TrainerConfigHelper.cpp index 133e2be104..a0a365aa0b 100644 --- a/paddle/trainer/TrainerConfigHelper.cpp +++ b/paddle/trainer/TrainerConfigHelper.cpp @@ -28,6 +28,7 @@ DECLARE_bool(with_cost); DECLARE_bool(with_gpu); DECLARE_bool(parallel_nn); DECLARE_string(config_args); +DECLARE_bool(use_mkldnn); const char *kConfigParserModuleName = "paddle.trainer.config_parser"; const char *kConfigParserFuncName = "parse_config_and_serialize"; @@ -44,6 +45,7 @@ TrainerConfigHelper::TrainerConfigHelper(const std::string &configFilePath) configArgs << "trainer_id=" << FLAGS_trainer_id << ",local=" << FLAGS_local << ",with_cost=" << FLAGS_with_cost << ",use_gpu=" << FLAGS_use_gpu << ",parallel_nn=" << FLAGS_parallel_nn + << ",use_mkldnn=" << FLAGS_use_mkldnn << ",cudnn_version=" << hl_get_cudnn_lib_version(); if (!FLAGS_config_args.empty()) { configArgs << "," << FLAGS_config_args; diff --git a/paddle/utils/Flags.cpp b/paddle/utils/Flags.cpp index 320f671ed9..ab1c181c62 100644 --- a/paddle/utils/Flags.cpp +++ b/paddle/utils/Flags.cpp @@ -20,6 +20,13 @@ DEFINE_bool(use_gpu, false, "Only support CPU training"); DEFINE_bool(use_gpu, true, "Whether to use GPU for training"); #endif +#ifdef PADDLE_USE_MKLDNN +// TODO(TJ): change to true when MKLDNN layers support multi-inputs +DEFINE_bool(use_mkldnn, false, "Default still keep use CPU training"); +#else +DEFINE_bool(use_mkldnn, false, "Only support CPU training"); +#endif + DEFINE_bool(parallel_nn, false, "Whether to use multi-threads to calculate one neural network." diff --git a/paddle/utils/Flags.h b/paddle/utils/Flags.h index dc4faef833..1832bb515e 100644 --- a/paddle/utils/Flags.h +++ b/paddle/utils/Flags.h @@ -40,3 +40,4 @@ DECLARE_bool(show_layer_stat); DECLARE_string(predict_file); DECLARE_bool(prev_batch_state); DECLARE_string(init_model_path); +DECLARE_bool(use_mkldnn); diff --git a/python/paddle/trainer/config_parser.py b/python/paddle/trainer/config_parser.py index 9ea69fc5e5..ae39abc081 100644 --- a/python/paddle/trainer/config_parser.py +++ b/python/paddle/trainer/config_parser.py @@ -1604,6 +1604,8 @@ class MultiClassCrossEntropySelfNormCostLayer(LayerBase): @config_layer('fc') class FCLayer(LayerBase): + layer_type = 'fc' + def __init__(self, name, size, @@ -1611,14 +1613,25 @@ class FCLayer(LayerBase): bias=True, error_clipping_threshold=None, **xargs): - super(FCLayer, self).__init__(name, 'fc', size, inputs=inputs, **xargs) + use_mkldnn = bool(int(g_command_config_args.get("use_mkldnn", 0))) + if use_mkldnn: + self.layer_type = 'mkldnn_fc' + config_assert( + len(inputs) == 1, + "MkldnnFCLayer support one and only one input!") + super(FCLayer, self).__init__( + name, self.layer_type, size, inputs=inputs, **xargs) for input_index in xrange(len(self.inputs)): input_layer = self.get_input_layer(input_index) psize = self.config.size * input_layer.size - dims = [input_layer.size, self.config.size] format = self.inputs[input_index].format sparse = format == "csr" or format == "csc" - + if use_mkldnn: + dims = [self.config.size, input_layer.size] + config_assert(not sparse, + "MkldnnFCLayer do not support sparse format yet") + else: + dims = [input_layer.size, self.config.size] if sparse: psize = self.inputs[input_index].nnz else: @@ -1631,6 +1644,11 @@ class FCLayer(LayerBase): self.config.error_clipping_threshold = error_clipping_threshold +@config_layer('mkldnn_fc') +class MkldnnFcLayer(FCLayer): + layer_type = 'mkldnn_fc' + + @config_layer('selective_fc') class SelectiveFCLayer(LayerBase): def __init__(self, From 6b3e0b786d9de3ef912953859e23204066aa70a4 Mon Sep 17 00:00:00 2001 From: zchen0211 Date: Sat, 5 Aug 2017 15:05:51 -0700 Subject: [PATCH 06/55] gather function with test passed --- paddle/operators/CMakeLists.txt | 5 ++ paddle/operators/gather_func.cc | 19 +++++ paddle/operators/gather_func.h | 124 ++++++++++++++------------------ paddle/operators/gather_test.cc | 50 +++++++++++++ 4 files changed, 126 insertions(+), 72 deletions(-) create mode 100644 paddle/operators/gather_func.cc create mode 100644 paddle/operators/gather_test.cc diff --git a/paddle/operators/CMakeLists.txt b/paddle/operators/CMakeLists.txt index b910bee836..10922892ca 100644 --- a/paddle/operators/CMakeLists.txt +++ b/paddle/operators/CMakeLists.txt @@ -41,6 +41,11 @@ function(op_library TARGET) endif() endfunction() +op_library(gather SRCS gather_func.cc) +cc_test(gather_test SRCS gather_test.cc DEPS gather) + +op_library(scatter SRCS scatter_func.cc) + op_library(add_op SRCS add_op.cc add_op.cu) cc_test(add_op_test SRCS add_op_test.cc DEPS add_op) diff --git a/paddle/operators/gather_func.cc b/paddle/operators/gather_func.cc new file mode 100644 index 0000000000..a6b2331f32 --- /dev/null +++ b/paddle/operators/gather_func.cc @@ -0,0 +1,19 @@ +/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#include "paddle/operators/gather_func.h" +#include +#include "paddle/framework/ddim.h" +#include "paddle/framework/tensor.h" +#include "paddle/platform/place.h" diff --git a/paddle/operators/gather_func.h b/paddle/operators/gather_func.h index 5975675cbb..5adc1e6b17 100644 --- a/paddle/operators/gather_func.h +++ b/paddle/operators/gather_func.h @@ -4,7 +4,7 @@ Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at - http://www.apache.org/licenses/LICENSE-2.0 + http://www.apache.org/licenses/LICENSE-2.0 Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, @@ -13,51 +13,18 @@ See the License for the specific language governing permissions and limitations under the License. */ #pragma once +#include #include + #include "paddle/framework/ddim.h" #include "paddle/framework/tensor.h" #include "paddle/platform/place.h" -/** - * Return a new tensor from source tensor, gathered according to index - * input[src]: type-T source Tensor - * input[index]: type-int index Tensor (1-D) - * return: output tensor - */ -template -Tensor* Gather(Tensor* src, Tensor* index) { - // check index of shape 1-D - PADDLE_ENFORCE(index->dims().size() == 1); - int index_size = index->dims()[0]; - - // Source shape - auto src_dims = src->dims(); - DDim output_dims(dims_src); - // Create a tensor of shape [index_size, dim_src[1:]] - output_dims[0] = index_size; - - Tensor* New_tensor; - float* output = nullptr; - - /* slice size */ - int slice_size = 1; - for (size_t i = 0; i < src_dims.size(); ++i) slice_size *= src_dims[i]; +using paddle::framework::Tensor; +using paddle::framework::DDim; - /* Gathering */ - if (place == CPUPlace()) { - // init for CPU - output = New_tensor.mutable_data(output_dims, CPUPlace()); - CPUGather( - src->data(), index->data(), slice_size, new_tensor->mutable_data()); - } else { // GPU - // init for GPU - output = New_tensor.mutable_data(output_dims, GPUPlace()); - /* how to specialize device??*/ - GPUGather( - d, src->data(), index->data(), slice_size, new_tensor->mutable_data()); - } - return New_tensor; -} +namespace paddle { +namespace operators { /* Implementation of CPU copy */ template @@ -70,48 +37,61 @@ void CPUGather(const T* params, for (size_t i = 0; i < index_size; ++i) { int index_ = indices[i]; - /* copy src[index_] to output[i] */ - memcpy( - output + i * slice_bytes, params + index_ * slice_bytes, slice_bytes); + // copy src[index_] to output[i] + memcpy(output + i * slice_size, params + index_ * slice_size, slice_bytes); } } /* Implementation of GPU copy: - I suppose the GPUDevice& d, contains gpu_id and thread_id - d = cuda_stream(gpu_id_, stream_id_); + I suppose the GPUDevice& d, contains gpu_id and thread_id + d = cuda_stream(gpu_id_, stream_id_); */ template -void GPUGather(const GPUDevice& d, - const T* src, +void GPUGather(const T* src, const int* index, const int slice_size, const int index_size, - T* output) { - int block_count = slice_size * index_size; - int thread_per_block = 1024; - - GatherOpKernel<<>>( - src, index, output, slice_size, indices_size, slice_size, out_size); -} + T* output); +/** + * Return a new tensor from source tensor, gathered according to index + * input[src]: type-T source Tensor + * input[index]: type-int index Tensor (1-D) + * return: output tensor + */ template -__global__ void GatherOpKernel(const T* params, - const int* indices, - T* out, - int64 indices_size, - int64 slice_size, - int64 out_size) { - /* I suppose we have the following macro, - which I strongly suggest that we should put in cuda: - #define CUDA_1D_KERNEL_LOOP(i, n) \ - for (int i = blockIdx.x * blockDim.x + threadIdx.x; i < n; \ - i += blockDim.x * gridDim.x) - */ - CUDA_1D_KERNEL_LOOP(i, out_size) { - int indices_i = i / slice_size; - int slice_i = i - indices_i * slice_size; // offset inside the slice - int gather_i = indices[indices_i]; - int params_i = gather_i * slice_size + slice_i; - out[i] = *(params + params_i); +void Gather(const platform::Place& place, + const paddle::framework::Tensor* src, + const paddle::framework::Tensor* index, + paddle::framework::Tensor* output) { + // check index of shape 1-D + PADDLE_ENFORCE(index->dims().size() == 1); + int index_size = index->dims()[0]; + + auto src_dims = src->dims(); + DDim output_dims(src_dims); + output_dims[0] = index_size; + + // slice size + int slice_size = 1; + for (size_t i = 1; i < src_dims.size(); ++i) slice_size *= src_dims[i]; + + // Gathering + if (platform::is_cpu_place(place)) { + CPUGather(src->data(), + index->data(), + slice_size, + index_size, + output->data()); + } else { + // init for GPU + // output_arr = output->mutable_data(output_dims, platform::GPUPlace()); + // how to specialize device?? + // GPUGather( + // d, src->data(), index->data(), slice_size, + // new_tensor->mutable_data()); } } + +} // namespace operators +} // namespace paddle diff --git a/paddle/operators/gather_test.cc b/paddle/operators/gather_test.cc new file mode 100644 index 0000000000..6f220b133b --- /dev/null +++ b/paddle/operators/gather_test.cc @@ -0,0 +1,50 @@ +/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#include "paddle/framework/ddim.h" +#include "paddle/framework/tensor.h" +#include "paddle/operators/gather_func.h" +#include "paddle/platform/place.h" + +#include +#include +#include + +TEST(_abc_, GatherData) { + using namespace paddle::framework; + using namespace paddle::platform; + using namespace paddle::operators; + + Tensor* src = new Tensor(); + Tensor* index = new Tensor(); + Tensor* output = new Tensor(); + // src.Resize(make_ddim({3, 4})); + + int* p_src = nullptr; + int* p_index = nullptr; + p_src = src->mutable_data(make_ddim({3, 4}), CPUPlace()); + p_index = index->mutable_data(make_ddim({2}), CPUPlace()); + + for (size_t i = 0; i < 12; ++i) p_src[i] = i; + p_index[0] = 1; + p_index[1] = 0; + + // gather + int* p_output = output->mutable_data(make_ddim({2, 4}), CPUPlace()); + + Gather(CPUPlace(), src, index, output); + + for (size_t i = 0; i < 4; ++i) EXPECT_EQ(p_output[i], i + 4); + for (size_t i = 4; i < 8; ++i) EXPECT_EQ(p_output[i], i - 4); +} From 94b172a7e8a0abb93129ec6b85758779c8dc7596 Mon Sep 17 00:00:00 2001 From: tensor-tang Date: Sun, 6 Aug 2017 18:08:17 +0800 Subject: [PATCH 07/55] fix mkldnn lib bug, and mkldnnbase --- CMakeLists.txt | 2 +- paddle/gserver/layers/MkldnnBase.h | 99 +++++++++++++++++++++++++++++ paddle/gserver/layers/MkldnnLayer.h | 1 + 3 files changed, 101 insertions(+), 1 deletion(-) create mode 100644 paddle/gserver/layers/MkldnnBase.h diff --git a/CMakeLists.txt b/CMakeLists.txt index b174831109..db9ff86baf 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -144,7 +144,7 @@ if(WITH_GPU) endif(WITH_GPU) if(WITH_MKLDNN) - list(APPEND EXTERNAL_LIBS ${MKLDNN_LIBRARY} ${MKLDNN_IOMP_LIB}) + list(APPEND EXTERNAL_LIBS ${MKLDNN_LIB} ${MKLDNN_IOMP_LIB}) endif() if(USE_NNPACK) diff --git a/paddle/gserver/layers/MkldnnBase.h b/paddle/gserver/layers/MkldnnBase.h new file mode 100644 index 0000000000..eba72e58e5 --- /dev/null +++ b/paddle/gserver/layers/MkldnnBase.h @@ -0,0 +1,99 @@ +/* Copyright (c) 2017 PaddlePaddle Authors. All Rights Reserve. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#pragma once + +#include "mkldnn.hpp" + +namespace paddle { + +typedef enum { + DNN_BASE = 1, + DNN_TESTS = 1, + DNN_SIZES, + DNN_FMTS, + DNN_TESTS_DETAILS, + DNN_TESTS_MORE, + DNN_ALL, +} DNN_LOG_LEVEL; + +/** + * @brief MKLDNN CPU engine. + * + */ +class CpuEngine { +public: + static CpuEngine& Instance() { + // Thread-safe in C++11. + static CpuEngine myInstance; + return myInstance; + } + + // Disallow copy or move + CpuEngine(const CpuEngine&) = delete; // Copy constructor + CpuEngine(CpuEngine&&) = delete; // Move constructor + CpuEngine& operator=(const CpuEngine&) = delete; // Copy assignment + CpuEngine& operator=(CpuEngine&&) = delete; // Move assignment + + mkldnn::engine& getEngine() { return cpuEngine_; } + +protected: + CpuEngine() : cpuEngine_(mkldnn::engine::cpu, 0) {} + // CpuEngine() : cpuEngine_(mkldnn::engine::cpu_lazy, 0) {} + ~CpuEngine() {} + +private: + mkldnn::engine cpuEngine_; +}; + +/** + * @brief MKLDNN Stream. + * + */ +class MkldnnStream { +public: + MkldnnStream() : ready_(false) { resetState(); } + + virtual ~MkldnnStream() {} + + /** + * @brief Submit stream + * @param prims The primitives vector + * block Waiting for the stream to complete + */ + void submit(std::vector& prims, bool block = true) { + resetState(); + stream_->submit(prims).wait(block); + ready_ = false; + } + + /** + * @brief Reset the mkldnn stream + */ + void resetState() { + if (ready_) { + return; + } + // TODO(TJ): change me when mkldnn have method to reset this state + stream_.reset(new mkldnn::stream(mkldnn::stream::kind::eager)); + // stream_.reset(new mkldnn::stream(mkldnn::stream::kind::lazy)); + ready_ = true; + } + +private: + bool ready_; + std::shared_ptr stream_; +}; + +} // namespace paddle diff --git a/paddle/gserver/layers/MkldnnLayer.h b/paddle/gserver/layers/MkldnnLayer.h index 7e6d88b273..e69c9d6a1a 100644 --- a/paddle/gserver/layers/MkldnnLayer.h +++ b/paddle/gserver/layers/MkldnnLayer.h @@ -16,6 +16,7 @@ limitations under the License. */ #include #include "Layer.h" +#include "MkldnnBase.h" #include "mkldnn.hpp" namespace paddle { From 90d5be74176bd7b69ce9494ebffae38f7323d639 Mon Sep 17 00:00:00 2001 From: tensor-tang Date: Sun, 6 Aug 2017 22:14:26 +0800 Subject: [PATCH 08/55] add mkldnn fc forward --- paddle/gserver/layers/MkldnnFcLayer.cpp | 78 +++++++++++++++++++- paddle/gserver/layers/MkldnnFcLayer.h | 9 +++ paddle/gserver/layers/MkldnnLayer.cpp | 98 +++++++++++++++++++++++++ paddle/gserver/layers/MkldnnLayer.h | 63 +++++++++++++--- 4 files changed, 236 insertions(+), 12 deletions(-) create mode 100644 paddle/gserver/layers/MkldnnLayer.cpp diff --git a/paddle/gserver/layers/MkldnnFcLayer.cpp b/paddle/gserver/layers/MkldnnFcLayer.cpp index f8220a2553..5584b43ff1 100644 --- a/paddle/gserver/layers/MkldnnFcLayer.cpp +++ b/paddle/gserver/layers/MkldnnFcLayer.cpp @@ -13,6 +13,7 @@ See the License for the specific language governing permissions and limitations under the License. */ #include "MkldnnFcLayer.h" +#include "paddle/utils/Stat.h" namespace paddle { @@ -20,11 +21,82 @@ REGISTER_LAYER(mkldnn_fc, MkldnnFcLayer); bool MkldnnFcLayer::init(const LayerMap& layerMap, const ParameterMap& parameterMap) { - return MkldnnLayer::init(layerMap, parameterMap); + if (!MkldnnLayer::init(layerMap, parameterMap)) { + return false; + } + + CHECK_EQ(inputLayers_.size(), 1) << "Only support one input layer yet!"; + CHECK_EQ(inputLayers_.size(), parameters_.size()); + CHECK(!parameters_[0]->isSparse()) << "Do not support sparse yet"; + + // output size, cat not be changed + oc_ = getSize(); + oh_ = 1; + ow_ = 1; + + // input size can not change in FC + iLayerSize_ = inputLayers_[0]->getSize(); + CHECK_EQ(parameters_[0]->getSize(), iLayerSize_ * oc_); + + // create weight + weight_ = + std::unique_ptr(new Weight(oc_, iLayerSize_, parameters_[0], 0)); + + // create biases + if (biasParameter_.get() != NULL) { + biases_ = std::unique_ptr(new Weight(1, oc_, biasParameter_)); + } + return true; +} + +void MkldnnFcLayer::reshape() { + const Argument& input = getInput(0); + int batchSize = input.getBatchSize(); + if (bs_ == batchSize) { + return; + } + bs_ = batchSize; + ih_ = input.getFrameHeight(); + iw_ = input.getFrameWidth(); + if (ih_ == 0) { + ih_ = 1; + } + if (iw_ == 0) { + iw_ = 1; + } + CHECK_EQ(iLayerSize_, inputLayers_[0]->getSize()); + ic_ = iLayerSize_ / (ih_ * iw_); + CHECK_EQ(size_t(ic_ * ih_ * iw_), iLayerSize_) << "not divisible"; + CHECK_EQ(size_t(oc_), getSize()); + + // reset output + output_.setFrameHeight(oh_); + output_.setFrameWidth(ow_); + resetOutput(bs_, oc_); } -void MkldnnFcLayer::forward(PassType passType) {} +void MkldnnFcLayer::forward(PassType passType) { + Layer::forward(passType); + + reshape(); -void MkldnnFcLayer::backward(const UpdateCallback& callback) {} + { + REGISTER_TIMER_INFO("mkldnn_FwdTimer", getName().c_str()); + real* input = getInputValue(0)->getData(); + real* output = getOutputValue()->getData(); + real* wgt = weight_->getW()->getData(); + bool hasBias = biases_ && biases_->getW(); + real* bias = hasBias ? biases_->getW()->getData() : NULL; + mkldnnForwardFC(bs_, ic_, ih_, iw_, input, oc_, output, wgt, bias); + } + /* activation */ { + REGISTER_TIMER_INFO("FwActTimer", getName().c_str()); + forwardActivation(); + } +} + +void MkldnnFcLayer::backward(const UpdateCallback& callback) { + ; // bool hasBias = biases_ && biases_->getWGrad(); +} } // namespace paddle diff --git a/paddle/gserver/layers/MkldnnFcLayer.h b/paddle/gserver/layers/MkldnnFcLayer.h index 430567949d..6167702771 100644 --- a/paddle/gserver/layers/MkldnnFcLayer.h +++ b/paddle/gserver/layers/MkldnnFcLayer.h @@ -26,6 +26,13 @@ namespace paddle { */ class MkldnnFcLayer : public MkldnnLayer { protected: + // input layer size, can not be change after init + size_t iLayerSize_; // == ic * ih * iw + + // fc weight and bias + std::unique_ptr weight_; + std::unique_ptr biases_; + public: explicit MkldnnFcLayer(const LayerConfig& config) : MkldnnLayer(config) {} @@ -34,6 +41,8 @@ public: bool init(const LayerMap& layerMap, const ParameterMap& parameterMap) override; + void reshape(); + void forward(PassType passType) override; void backward(const UpdateCallback& callback) override; diff --git a/paddle/gserver/layers/MkldnnLayer.cpp b/paddle/gserver/layers/MkldnnLayer.cpp new file mode 100644 index 0000000000..d462e8694c --- /dev/null +++ b/paddle/gserver/layers/MkldnnLayer.cpp @@ -0,0 +1,98 @@ +/* Copyright (c) 2017 PaddlePaddle Authors. All Rights Reserve. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#include "MkldnnLayer.h" + +// using namespace mkldnn; // NOLINT +using mem = mkldnn::memory; // NOLINT +typedef mem::format format; +typedef mkldnn::inner_product_forward fc_fwd; +typedef mkldnn::inner_product_backward_weights fc_bwdWgt; +typedef mkldnn::inner_product_backward_data fc_bwdData; + +namespace paddle { + +bool MkldnnLayer::init(const LayerMap& layerMap, + const ParameterMap& parameterMap) { + CHECK(FLAGS_use_mkldnn) << "MkldnnLayers only support use_mkldnn." + << "Please set WITH_MKLDNN=ON"; + // TODO(TJ): deivecId + return Layer::init(layerMap, parameterMap); +} + +void MkldnnLayer::resetForwardFC(int bs, + int ic, + int ih, + int iw, + real* botData, + int oc, + real* topData, + real* wgtData, + real* biasData) { + bool hasSpatial = ih == 1 && iw == 1 ? false : true; + engine_ = CpuEngine::Instance().getEngine(); + + mem::desc botMD = hasSpatial ? createMD({bs, ic, ih, iw}, format::nchw) + : createMD({bs, ic}, format::nc); + mem::desc wgtMD = hasSpatial ? createMD({oc, ic, ih, iw}, format::oihw) + : createMD({oc, ic}, format::oi); + mem::desc biasMD = biasData != NULL ? createMD({oc}, format::x) + : createMD({}, format::format_undef); + mem::desc topMD = createMD({bs, oc}, format::nc); + + mkldnn::prop_kind pk = mkldnn::prop_kind::forward; + fc_fwd::desc fwdDesc = biasData != NULL + ? fc_fwd::desc(pk, botMD, wgtMD, biasMD, topMD) + : fc_fwd::desc(pk, botMD, wgtMD, topMD); + fc_fwd::primitive_desc fwdPD = fc_fwd::primitive_desc(fwdDesc, engine_); + + mem bot = mem(mem::primitive_desc(botMD, engine_), botData); + mem wgt = mem(mem::primitive_desc(wgtMD, engine_), wgtData); + mem top = mem(mem::primitive_desc(topMD, engine_), topData); + + if (biasData != NULL) { + mem bias = mem(mem::primitive_desc(biasMD, engine_), biasData); + fwd_.reset(new fc_fwd(fwdPD, bot, wgt, bias, top)); + } else { + fwd_.reset(new fc_fwd(fwdPD, bot, wgt, top)); + } + pipelineFwd_.clear(); + pipelineFwd_.push_back(*fwd_); +} + +void MkldnnLayer::mkldnnForwardFC(int bs, + int ic, + int ih, + int iw, + real* botData, + int oc, + real* topData, + real* wgtData, + real* biasData) { + // if input size changed, reset it + resetForwardFC(bs, ic, ih, iw, botData, oc, topData, wgtData, biasData); + + // just forward + // update botdata + stream_->submit(pipelineFwd_); +} + +mem::desc MkldnnLayer::createMD(mem::dims dims, + mem::format fmt, + mem::data_type type) { + // TODO(TJ): isFmtSuppoted(fmt) + return mem::desc(dims, type, fmt); +} + +} // namespace paddle diff --git a/paddle/gserver/layers/MkldnnLayer.h b/paddle/gserver/layers/MkldnnLayer.h index e69c9d6a1a..6e41ee4028 100644 --- a/paddle/gserver/layers/MkldnnLayer.h +++ b/paddle/gserver/layers/MkldnnLayer.h @@ -29,20 +29,65 @@ typedef std::shared_ptr MkldnnLayerPtr; * */ class MkldnnLayer : public Layer { +protected: + // batch size + int bs_; + // input image channel, height and width + int ic_, ih_, iw_; + // output image channel, height and width + int oc_, oh_, ow_; + + // mkldnn engine, stream and primivtives + mkldnn::engine engine_; + std::shared_ptr stream_; + + std::shared_ptr fwd_; + std::vector pipelineFwd_; + std::vector pipelineBwd_; + public: - explicit MkldnnLayer(const LayerConfig& config) : Layer(config) {} + explicit MkldnnLayer(const LayerConfig& config) + : Layer(config), + bs_(0), + ic_(0), + ih_(0), + iw_(0), + oc_(0), + oh_(0), + ow_(0), + engine_(mkldnn::engine::cpu, 0), + stream_(nullptr) {} ~MkldnnLayer() {} - virtual bool init(const LayerMap& layerMap, - const ParameterMap& parameterMap) { - CHECK(FLAGS_use_mkldnn) << "MkldnnLayers only support use_mkldnn." - << "Please set WITH_MKLDNN=ON"; - // TODO(TJ): deivecId - return Layer::init(layerMap, parameterMap); - } + virtual bool init(const LayerMap& layerMap, const ParameterMap& parameterMap); + + void resetForwardFC(int bs, + int ic, + int ih, + int iw, + real* botData, + int oc, + real* topData, + real* wgtData, + real* biasData); + + void mkldnnForwardFC(int bs, + int ic, + int ih, + int iw, + real* botData, + int oc, + real* topData, + real* wgtData, + real* biasData); - void resetOutput(size_t height, size_t width) { ; } + // TODO(TJ): move to MkldnnMatrix + // create memory desc + inline mkldnn::memory::desc createMD( + mkldnn::memory::dims dims, + mkldnn::memory::format fmt, + mkldnn::memory::data_type type = mkldnn::memory::data_type::f32); }; } // namespace paddle From 1203ebc498b7c11e69d6aa4613a8a823ecfa01e1 Mon Sep 17 00:00:00 2001 From: tensor-tang Date: Sun, 6 Aug 2017 23:40:38 +0800 Subject: [PATCH 09/55] add mkldnn fc backward --- paddle/gserver/layers/MkldnnFcLayer.cpp | 37 ++++++++++- paddle/gserver/layers/MkldnnLayer.cpp | 88 +++++++++++++++++++++++++ paddle/gserver/layers/MkldnnLayer.h | 31 ++++++++- 3 files changed, 153 insertions(+), 3 deletions(-) diff --git a/paddle/gserver/layers/MkldnnFcLayer.cpp b/paddle/gserver/layers/MkldnnFcLayer.cpp index 5584b43ff1..b62422da83 100644 --- a/paddle/gserver/layers/MkldnnFcLayer.cpp +++ b/paddle/gserver/layers/MkldnnFcLayer.cpp @@ -77,7 +77,6 @@ void MkldnnFcLayer::reshape() { void MkldnnFcLayer::forward(PassType passType) { Layer::forward(passType); - reshape(); { @@ -97,6 +96,40 @@ void MkldnnFcLayer::forward(PassType passType) { } void MkldnnFcLayer::backward(const UpdateCallback& callback) { - ; // bool hasBias = biases_ && biases_->getWGrad(); + /* Do derivation */ { + REGISTER_TIMER_INFO("BpActTimer", getName().c_str()); + backwardActivation(); + } + + bool hasBias = biases_ && biases_->getWGrad(); + { + REGISTER_TIMER_INFO("mkldnn_bwdTimer", getName().c_str()); + real* inVal = getInputValue(0)->getData(); + real* inGrad = + getInputGrad(0) != nullptr ? getInputGrad(0)->getData() : NULL; + real* outGrad = getOutputGrad()->getData(); + real* wgtGrad = weight_->getWGrad()->getData(); + real* wgtVal = weight_->getW()->getData(); + real* biasGrad = hasBias ? biases_->getWGrad()->getData() : NULL; + mkldnnBackwardFC(bs_, + ic_, + ih_, + iw_, + inGrad, + inVal, + oc_, + outGrad, + wgtGrad, + wgtVal, + biasGrad); + } + + { + REGISTER_TIMER_INFO("WeightUpdate", getName().c_str()); + weight_->getParameterPtr()->incUpdate(callback); + if (hasBias) { + biases_->getParameterPtr()->incUpdate(callback); + } + } } } // namespace paddle diff --git a/paddle/gserver/layers/MkldnnLayer.cpp b/paddle/gserver/layers/MkldnnLayer.cpp index d462e8694c..64bed5c821 100644 --- a/paddle/gserver/layers/MkldnnLayer.cpp +++ b/paddle/gserver/layers/MkldnnLayer.cpp @@ -88,6 +88,94 @@ void MkldnnLayer::mkldnnForwardFC(int bs, stream_->submit(pipelineFwd_); } +void MkldnnLayer::resetBackwardFC(int bs, + int ic, + int ih, + int iw, + real* botDiff, + real* botData, + int oc, + real* topDiff, + real* wgtDiff, + real* wgtData, + real* biasDiff) { + bool hasSpatial = ih == 1 && iw == 1 ? false : true; + engine_ = CpuEngine::Instance().getEngine(); + + // backward weight + mem::desc botMD = hasSpatial ? createMD({bs, ic, ih, iw}, format::nchw) + : createMD({bs, ic}, format::nc); + mem::desc wgtMD = hasSpatial ? createMD({oc, ic, ih, iw}, format::oihw) + : createMD({oc, ic}, format::oi); + mem::desc topMD = createMD({bs, oc}, format::nc); + mem::desc biasMD = biasDiff != NULL ? createMD({oc}, format::x) + : createMD({}, format::format_undef); + + fc_fwd::desc fwdDesc = + fc_fwd::desc(mkldnn::prop_kind::forward, botMD, wgtMD, topMD); + fc_fwd::primitive_desc fwdPD = fc_fwd::primitive_desc(fwdDesc, engine_); + fc_bwdWgt::desc bwdWgtDesc = + biasDiff != NULL ? fc_bwdWgt::desc(botMD, wgtMD, biasMD, topMD) + : fc_bwdWgt::desc(botMD, wgtMD, topMD); + fc_bwdWgt::primitive_desc bwdWgtPD = + fc_bwdWgt::primitive_desc(bwdWgtDesc, engine_, fwdPD); + + mem botVal = mem(mem::primitive_desc(botMD, engine_), botData); + mem wgtGrad = mem(mem::primitive_desc(wgtMD, engine_), wgtDiff); + mem topGrad = mem(mem::primitive_desc(topMD, engine_), topDiff); + + if (biasDiff != NULL) { + mem biasGrad = mem(mem::primitive_desc(biasMD, engine_), biasDiff); + bwdWgt_.reset(new fc_bwdWgt(bwdWgtPD, botVal, topGrad, wgtGrad, biasGrad)); + } else { + bwdWgt_.reset(new fc_bwdWgt(bwdWgtPD, botVal, topGrad, wgtGrad)); + } + pipelineBwd_.clear(); + pipelineBwd_.push_back(*bwdWgt_); + + // backward data + if (botDiff == NULL) { + return; + } + + fc_bwdData::desc bwdDataDesc = fc_bwdData::desc(botMD, wgtMD, topMD); + fc_bwdData::primitive_desc bwdDataPD = + fc_bwdData::primitive_desc(bwdDataDesc, engine_, fwdPD); + mem botGrad = mem(mem::primitive_desc(botMD, engine_), botDiff); + mem wgtVal = mem(mem::primitive_desc(wgtMD, engine_), wgtData); + bwdData_.reset(new fc_bwdData(bwdDataPD, topGrad, wgtVal, botGrad)); + pipelineBwd_.push_back(*bwdData_); +} + +void MkldnnLayer::mkldnnBackwardFC(int bs, + int ic, + int ih, + int iw, + real* botDiff, + real* botData, + int oc, + real* topDiff, + real* wgtDiff, + real* wgtData, + real* biasDiff) { + // if input size changed, reset it + resetBackwardFC(bs, + ic, + ih, + iw, + botDiff, + botData, + oc, + topDiff, + wgtDiff, + wgtData, + biasDiff); + + // just forward + // update botdata + stream_->submit(pipelineBwd_); +} + mem::desc MkldnnLayer::createMD(mem::dims dims, mem::format fmt, mem::data_type type) { diff --git a/paddle/gserver/layers/MkldnnLayer.h b/paddle/gserver/layers/MkldnnLayer.h index 6e41ee4028..5927bd6d52 100644 --- a/paddle/gserver/layers/MkldnnLayer.h +++ b/paddle/gserver/layers/MkldnnLayer.h @@ -42,6 +42,8 @@ protected: std::shared_ptr stream_; std::shared_ptr fwd_; + std::shared_ptr bwdWgt_; + std::shared_ptr bwdData_; std::vector pipelineFwd_; std::vector pipelineBwd_; @@ -56,7 +58,10 @@ public: oh_(0), ow_(0), engine_(mkldnn::engine::cpu, 0), - stream_(nullptr) {} + stream_(nullptr), + fwd_(nullptr), + bwdWgt_(nullptr), + bwdData_(nullptr) {} ~MkldnnLayer() {} @@ -82,6 +87,30 @@ public: real* wgtData, real* biasData); + void resetBackwardFC(int bs, + int ic, + int ih, + int iw, + real* botDiff, + real* botData, + int oc, + real* topDiff, + real* wgtDiff, + real* wgtData, + real* biasDiff); + + void mkldnnBackwardFC(int bs, + int ic, + int ih, + int iw, + real* botDiff, + real* botData, + int oc, + real* topDiff, + real* wgtDiff, + real* wgtData, + real* biasDiff); + // TODO(TJ): move to MkldnnMatrix // create memory desc inline mkldnn::memory::desc createMD( From ec9009f320204531082f81f6cb035292ff3f0f14 Mon Sep 17 00:00:00 2001 From: tensor-tang Date: Mon, 7 Aug 2017 14:53:02 +0800 Subject: [PATCH 10/55] add mkldnn tester --- paddle/gserver/layers/MkldnnFcLayer.cpp | 18 ++ paddle/gserver/layers/MkldnnFcLayer.h | 2 + paddle/gserver/layers/MkldnnLayer.cpp | 3 +- paddle/gserver/tests/CMakeLists.txt | 9 + paddle/gserver/tests/MkldnnTester.cpp | 381 ++++++++++++++++++++++++ paddle/gserver/tests/MkldnnTester.h | 119 ++++++++ paddle/gserver/tests/test_Mkldnn.cpp | 76 +++++ 7 files changed, 607 insertions(+), 1 deletion(-) create mode 100644 paddle/gserver/tests/MkldnnTester.cpp create mode 100644 paddle/gserver/tests/MkldnnTester.h create mode 100644 paddle/gserver/tests/test_Mkldnn.cpp diff --git a/paddle/gserver/layers/MkldnnFcLayer.cpp b/paddle/gserver/layers/MkldnnFcLayer.cpp index b62422da83..c3b1f83d7d 100644 --- a/paddle/gserver/layers/MkldnnFcLayer.cpp +++ b/paddle/gserver/layers/MkldnnFcLayer.cpp @@ -13,6 +13,7 @@ See the License for the specific language governing permissions and limitations under the License. */ #include "MkldnnFcLayer.h" +#include "paddle/utils/Logging.h" #include "paddle/utils/Stat.h" namespace paddle { @@ -41,6 +42,7 @@ bool MkldnnFcLayer::init(const LayerMap& layerMap, // create weight weight_ = std::unique_ptr(new Weight(oc_, iLayerSize_, parameters_[0], 0)); + initWgt(); // create biases if (biasParameter_.get() != NULL) { @@ -49,6 +51,22 @@ bool MkldnnFcLayer::init(const LayerMap& layerMap, return true; } +void MkldnnFcLayer::initWgt() { + // The weight_ is transposed from initial paddle weight + MatrixPtr paddleWgt = Matrix::create( + weight_->getW()->getData(), iLayerSize_, oc_, false, false); + + std::ostringstream ostr; + paddleWgt->print(ostr); + VLOG(DNN_BASE) << ostr.str(); + + // Firstly in mkldnn, the matrix is transposed from initial paddle weight + MatrixPtr paddleWgtT; + paddleWgt->transpose(paddleWgtT, true); + + weight_->getW()->copyFrom(*paddleWgtT); +} + void MkldnnFcLayer::reshape() { const Argument& input = getInput(0); int batchSize = input.getBatchSize(); diff --git a/paddle/gserver/layers/MkldnnFcLayer.h b/paddle/gserver/layers/MkldnnFcLayer.h index 6167702771..4cc445e87b 100644 --- a/paddle/gserver/layers/MkldnnFcLayer.h +++ b/paddle/gserver/layers/MkldnnFcLayer.h @@ -41,6 +41,8 @@ public: bool init(const LayerMap& layerMap, const ParameterMap& parameterMap) override; + void initWgt(); + void reshape(); void forward(PassType passType) override; diff --git a/paddle/gserver/layers/MkldnnLayer.cpp b/paddle/gserver/layers/MkldnnLayer.cpp index 64bed5c821..cead3d87ea 100644 --- a/paddle/gserver/layers/MkldnnLayer.cpp +++ b/paddle/gserver/layers/MkldnnLayer.cpp @@ -26,7 +26,8 @@ namespace paddle { bool MkldnnLayer::init(const LayerMap& layerMap, const ParameterMap& parameterMap) { CHECK(FLAGS_use_mkldnn) << "MkldnnLayers only support use_mkldnn." - << "Please set WITH_MKLDNN=ON"; + << "Please set WITH_MKLDNN=ON " + << "and set use_mkldnn=True"; // TODO(TJ): deivecId return Layer::init(layerMap, parameterMap); } diff --git a/paddle/gserver/tests/CMakeLists.txt b/paddle/gserver/tests/CMakeLists.txt index a43adc7ce7..486456c8b7 100644 --- a/paddle/gserver/tests/CMakeLists.txt +++ b/paddle/gserver/tests/CMakeLists.txt @@ -18,6 +18,15 @@ add_unittest_without_exec(test_LayerGrad add_test(NAME test_LayerGrad COMMAND test_LayerGrad) +########## test_Mkldnn layers and activations ########## +if(WITH_MKLDNN) + add_unittest_without_exec(test_Mkldnn + test_Mkldnn.cpp + MkldnnTester.cpp + LayerGradUtil.cpp) + add_test(NAME test_Mkldnn COMMAND test_Mkldnn) +endif() + ################ test_CRFLayerGrad #################### add_unittest_without_exec(test_CRFLayerGrad test_CRFLayerGrad.cpp diff --git a/paddle/gserver/tests/MkldnnTester.cpp b/paddle/gserver/tests/MkldnnTester.cpp new file mode 100644 index 0000000000..38e5bc75be --- /dev/null +++ b/paddle/gserver/tests/MkldnnTester.cpp @@ -0,0 +1,381 @@ +/* Copyright (c) 2017 PaddlePaddle Authors. All Rights Reserve. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#include "MkldnnTester.h" +#include "paddle/gserver/layers/MkldnnBase.h" + +namespace paddle { + +// init data layer and test layer of both dnn and reference +void MkldnnTester::reset(const TestConfig& dnn, + const TestConfig& ref, + size_t batchSize) { + const bool trans = false; + const bool useGpu = false; + + // clear + configs_.clear(); + layerNames_.clear(); + dataLayers_.clear(); + datas_.clear(); + layerMaps_.clear(); + parameters_.clear(); + testLayers_.clear(); + + // resize + configs_.resize(NUM); + layerNames_.resize(NUM); + dataLayers_.resize(NUM); + datas_.resize(NUM); + layerMaps_.resize(NUM); + parameters_.resize(NUM); + testLayers_.resize(NUM); + + // reset configs and layer names + configs_[DNN] = dnn; + configs_[REF] = ref; + layerNames_[DNN] = "mkldnn"; // the first is mkldnn layer + layerNames_[REF] = "reference"; // second is reference layer + + // reset others + for (size_t i = 0; i < NUM; ++i) { + configs_[i].layerConfig.set_name(layerNames_[i]); + initDataLayer(configs_[i], + &(dataLayers_[i]), + &(datas_[i]), + &(layerMaps_[i]), + layerNames_[i], + batchSize, + trans, + useGpu); + initTestLayer( + configs_[i], &(layerMaps_[i]), &(parameters_[i]), &(testLayers_[i])); + } + dnnLayer_ = testLayers_[DNN]; + refLayer_ = testLayers_[REF]; + EXPECT_EQ(dataLayers_[DNN].size(), dataLayers_[REF].size()); + EXPECT_EQ(parameters_[DNN].size(), parameters_[REF].size()); + + setInputImgSize(); +} + +void MkldnnTester::setInputImgSize() { + for (size_t n = 0; n < dataLayers_.size(); ++n) { + for (size_t i = 0; i < dataLayers_[n].size(); ++i) { + // TODO(TJ): fix me when concat and elewise ready + dataLayers_[n][i]->getOutput().setFrameHeight(ih_); + dataLayers_[n][i]->getOutput().setFrameWidth(iw_); + } + } +} + +// init randome parameters of ref, and copy to mkldnn +void MkldnnTester::randomWgtDatas() { + EXPECT_EQ(parameters_[DNN].size(), parameters_[REF].size()); + for (size_t i = 0; i < parameters_[REF].size(); ++i) { + const VectorPtr& dnnValue = parameters_[DNN][i]->getBuf(PARAMETER_VALUE); + const VectorPtr& refValue = parameters_[REF][i]->getBuf(PARAMETER_VALUE); + parameters_[REF][i]->randomize(); + dnnValue->copyFrom(*refValue); + + VLOG(lvl_) << "Random weight data " << parameters_[DNN][i]->getName(); + printVector(dnnValue); + } +} + +// random botdata of ref layer and copy same to mkldnn +void MkldnnTester::randomBotDatas() { + CHECK_EQ(dataLayers_.size(), NUM); + for (size_t i = 0; i < dataLayers_[DNN].size(); ++i) { + dataLayers_[REF][i]->getOutputValue()->randomizeUniform(); + dataLayers_[DNN][i]->getOutputValue()->copyFrom( + *(dataLayers_[REF][i]->getOutputValue())); + VLOG(lvl_) << "Input " << i << " data:"; + printMatrix(dataLayers_[REF][i]->getOutputValue()); + } +} + +void MkldnnTester::randomTopDiffs() { + refLayer_->getOutputGrad()->randomizeUniform(); + dnnLayer_->getOutputGrad()->copyFrom(*(refLayer_->getOutputGrad())); + VLOG(lvl_) << "Random dom Backward Input, TopDiff: "; + printMatrix(refLayer_->getOutputGrad()); +} + +void MkldnnTester::checkForward() { + printTopDatas(); + double delta = compareMatrix(testLayers_[DNN]->getOutputValue(), + testLayers_[REF]->getOutputValue()); + VLOG(DNN_TESTS_DETAILS) << "Check Forward"; + EXPECT_LE(fabs(delta), eps_); +} + +void MkldnnTester::checkBackwardData() { + const bool isBN = dnnLayer_->getType() == "mkldnn_batch_norm"; + for (size_t i = 0; i < dataLayers_[DNN].size(); ++i) { + const MatrixPtr& dnnDiff = dataLayers_[DNN][i]->getOutputGrad(); + const MatrixPtr& refDiff = dataLayers_[REF][i]->getOutputGrad(); + VLOG(lvl_) << "Mkldnn Backward Output BotDiff " << i; + printMatrix(dnnDiff); + VLOG(lvl_) << "Reference Backward Output BotDiff " << i; + printMatrix(refDiff); + + double delta = compareMatrix(dnnDiff, refDiff); + EXPECT_LE(fabs(delta), eps_); + if (isBN) { + // the other two inputs in batch norm are for moving mean and var + break; + } + } +} + +void MkldnnTester::checkBackwardWgts() { + CHECK_EQ(parameters_[DNN].size(), parameters_[REF].size()); + vector dnnWgts; // used to temply save mkldnn weights + saveWgt(parameters_[DNN], dnnWgts); + + // TODO(TJ): cvtWgtToPaddle + for (size_t i = 0; i < parameters_[DNN].size(); ++i) { + const VectorPtr& dnn = parameters_[DNN][i]->getBuf(PARAMETER_VALUE); + const VectorPtr& ref = parameters_[REF][i]->getBuf(PARAMETER_VALUE); + VLOG(lvl_) << "Mkldnn Output weight " << parameters_[DNN][i]->getName(); + printVector(dnn); + VLOG(lvl_) << "Reference Output weight " << parameters_[REF][i]->getName(); + printVector(ref); + + double delta = compareVector(dnn, ref); + EXPECT_LE(fabs(delta), eps_); + } + + VLOG(DNN_TESTS_DETAILS) << "Restore dnn weights before comapre"; + restoreWgt(dnnWgts, parameters_[DNN]); +} + +void MkldnnTester::saveWgt(const vector& from, + vector& to) { + const bool useGpu = false; + to.resize(from.size()); + for (size_t i = 0; i < to.size(); ++i) { + const VectorPtr& wgt = from[i]->getBuf(PARAMETER_VALUE); + to[i] = Vector::create(wgt->getSize(), useGpu); + to[i]->copyFrom(*wgt); + } +} + +void MkldnnTester::restoreWgt(const vector& from, + vector& to) { + CHECK_EQ(from.size(), to.size()); + for (size_t i = 0; i < from.size(); ++i) { + const VectorPtr& wgt = to[i]->getBuf(PARAMETER_VALUE); + wgt->copyFrom(*from[i]); + } +} + +// clear parameters grad +void MkldnnTester::clearWgtDiffs() { + for (size_t n = 0; n < parameters_.size(); ++n) { + for (size_t i = 0; i < parameters_[n].size(); ++i) { + const VectorPtr& grad = parameters_[n][i]->getBuf(PARAMETER_GRADIENT); + if (grad) { + grad->zeroMem(); + } + } + } +} + +void MkldnnTester::clearBotDiffs() { + // dnn and ref + for (size_t n = 0; n < dataLayers_.size(); ++n) { + // all inputs layers + for (size_t i = 0; i < dataLayers_[n].size(); ++i) { + dataLayers_[n][i]->getOutputGrad()->zeroMem(); + } + } +} + +void MkldnnTester::clearBotDiffs(int n) { + CHECK_LT(n, NUM); + // all inputs layers + for (size_t i = 0; i < dataLayers_[n].size(); ++i) { + dataLayers_[n][i]->getOutputGrad()->zeroMem(); + } +} + +void MkldnnTester::clearTopDatas() { + for (size_t i = 0; i < testLayers_.size(); ++i) { + testLayers_[i]->getOutputValue()->zeroMem(); + } +} + +void MkldnnTester::printTopDatas() { + if (!log_) { + return; + } + + for (int n = 0; n < NUM; ++n) { + VLOG(lvl_) << testLayers_[n]->getType() << " forward output TopData: "; + printMatrix(testLayers_[n]->getOutputValue()); + } +} + +void MkldnnTester::printMatrix(const MatrixPtr& m) { + if (!log_) { + return; + } +#ifdef _DEBUG + std::ostream str; + m->print(str); + VLOG(lvl_) << str; +#endif +} + +void MkldnnTester::printVector(const VectorPtr& v) { + if (!log_) { + return; + } + + CHECK(v); + CHECK(v->getData()); + const real* pd = v->getData(); + const size_t sz = v->getSize(); + std::stringstream row; + for (size_t i = 0; i < sz; ++i) { + row << pd[i] << ", "; + } + VLOG(lvl_) << row.str(); +} + +double MkldnnTester::getDelta(const real* d1, + const real* d2, + size_t len, + const float failRate, + const float thres) { + double delta = 0, sum = 0; + int failCnt = 0; + const double eps = 1e-5; + double maxOut = 0; + for (size_t i = 0; i < len; ++i) { + double ref = fabs(d2[i]); + double diff = fabs(d1[i] - d2[i]); + delta += diff; + sum += ref; + if (ref > eps && fabs(d1[i]) > eps && diff / ref > thres) { + maxOut = std::max(maxOut, diff / ref); + failCnt++; + } + } + EXPECT_TRUE(std::isnormal(sum)); + EXPECT_FALSE(std::isinf(sum)); + EXPECT_FALSE(std::isnan(delta)); + VLOG(DNN_TESTS_MORE) << "reference avg data: " << sum / len + << ", delta: " << delta / sum << ", failCnt:" << failCnt; + return (failCnt / (float)len) > failRate ? maxOut : delta / sum; +} + +double MkldnnTester::compareMatrix(const MatrixPtr& m1, const MatrixPtr& m2) { + CHECK_EQ(m1->getElementCnt(), m2->getElementCnt()); + return getDelta(m1->getData(), m2->getData(), m1->getElementCnt()); +} + +double MkldnnTester::compareVector(const VectorPtr& v1, const VectorPtr& v2) { + CHECK_EQ(v1->getSize(), v2->getSize()); + return getDelta(v1->getData(), v2->getData(), v1->getSize()); +} + +void MkldnnTester::runOnce() { + // test forward + randomBotDatas(); + dnnLayer_->forward(PASS_TRAIN); + refLayer_->forward(PASS_TRAIN); + checkForward(); + + // test backward + randomTopDiffs(); + dnnLayer_->backward(nullptr); + refLayer_->backward(nullptr); + checkBackwardData(); + checkBackwardWgts(); + + // clear buffers + // ref code will addto the diff, dnn code will writeto it + clearBotDiffs(REF); + // below two should be coverd by test layers + // clearTopDatas(); + // clearWgtDiffs(); +} + +void MkldnnTester::run(const TestConfig& dnn, + const TestConfig& ref, + size_t batchSize, + size_t inputImgH, + size_t inputImgW, + size_t iter, + float epsilon, + bool log, + int level) { + VLOG(DNN_TESTS) << "Test MKLDNN functionality: " << dnn.layerConfig.type() + << " vs " << ref.layerConfig.type(); + ih_ = inputImgH; + iw_ = inputImgW; + iter_ = iter; + eps_ = epsilon; + log_ = log; + lvl_ = level; + + // Firstly always set flag false to initial from paddle weight + TestConfig first = dnn; + // first.layerConfig.set_init_wgt_from_mkldnn(false); + + // reset and run once + reset(first, ref, batchSize); + randomWgtDatas(); + clearWgtDiffs(); + clearBotDiffs(); + + VLOG(DNN_TESTS) << "Check Iteration 0"; + runOnce(); + + // firstly get the flag + bool initWgtFromMkldnn = false; + // dnn.layerConfig.has_init_wgt_from_mkldnn() && + // dnn.layerConfig.init_wgt_from_mkldnn(); + + if (initWgtFromMkldnn) { + // after run once the mkldnn weight has been stored in dnnlayer + // then save the weigths and restart again + vector dnnWgts, refWgts; + CHECK_EQ(parameters_[DNN].size(), parameters_[REF].size()); + saveWgt(parameters_[DNN], dnnWgts); + saveWgt(parameters_[REF], refWgts); + + // restart again with flag true + reset(dnn, ref, batchSize); + + // restore wgt + restoreWgt(dnnWgts, parameters_[DNN]); + restoreWgt(refWgts, parameters_[REF]); + clearWgtDiffs(); + clearBotDiffs(); + + // at least run once + runOnce(); + } + + for (size_t i = 1; i < iter_; ++i) { + VLOG(DNN_TESTS) << "Check Iteration " << i; + runOnce(); + } +} + +} // namespace paddle diff --git a/paddle/gserver/tests/MkldnnTester.h b/paddle/gserver/tests/MkldnnTester.h new file mode 100644 index 0000000000..16b0970a8e --- /dev/null +++ b/paddle/gserver/tests/MkldnnTester.h @@ -0,0 +1,119 @@ +/* Copyright (c) 2017 PaddlePaddle Authors. All Rights Reserve. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#pragma once + +#include +#include +#include "LayerGradUtil.h" +#include "paddle/gserver/layers/MkldnnBase.h" + +namespace paddle { + +/** + * @brief test the functionality of Mkldnnlayers + * refer to paddle original function + */ +class MkldnnTester { + enum { + DNN = 0, + REF = 1, + NUM = 2, + }; + +protected: + std::vector configs_; + vector layerNames_; + vector> dataLayers_; + vector> datas_; + vector layerMaps_; + vector> parameters_; + vector testLayers_; + LayerPtr dnnLayer_, refLayer_; + + /// run some iterations, all the result should pass + size_t iter_; + /// whether to print out the details + bool log_; + /// vlog level to print the matrix details datas + int lvl_; + /// epsilon + float eps_; + /// input image size, default 1 + size_t ih_, iw_; + +public: + explicit MkldnnTester(size_t iter = 3, float epsilon = 1e-4) { + iter_ = iter; + eps_ = epsilon; + log_ = false; + lvl_ = DNN_TESTS_MORE; + } + + ~MkldnnTester() {} + +public: + void run(const TestConfig& dnn, + const TestConfig& ref, + size_t batchSize, + size_t inputImgH = 1, + size_t inputImgW = 1, + size_t iter = 3, + float epsilon = 1e-4, + bool log = false, + int level = DNN_TESTS_MORE); + void setLogLevel(int lvl) { lvl_ = lvl; } + +private: + void reset(const TestConfig& dnn, const TestConfig& ref, size_t batchSize); + void setInputImgSize(); + void runOnce(); + + void randomWgtDatas(); + void randomBotDatas(); + void randomTopDiffs(); + + void checkForward(); + void checkBackwardData(); + void checkBackwardWgts(); + + void clearWgtDiffs(); + void clearBotDiffs(); + void clearBotDiffs(int n); // clear specific layer + void clearTopDatas(); + + void printTopDatas(); + void printMatrix(const MatrixPtr& m); + void printVector(const VectorPtr& v); + + void saveWgt(const vector& from, vector& to); + void restoreWgt(const vector& from, vector& to); + + double compareMatrix(const MatrixPtr& m1, const MatrixPtr& m2); + double compareVector(const VectorPtr& v1, const VectorPtr& v2); + + /** + * Get delta percent + * if many(>failRate) wrong(abs(dnn-ref)/abs(ref)>thres) points return the + * max(diff/ref) + * else return sum(abs(a-b)) / sum(abs(b)) should smaller than eps + */ + double getDelta(const real* d1, + const real* d2, + size_t len, + const float failRate = 1e-3, + const float thres = 0.1); +}; + +} // namespace paddle diff --git a/paddle/gserver/tests/test_Mkldnn.cpp b/paddle/gserver/tests/test_Mkldnn.cpp new file mode 100644 index 0000000000..c2c6b701ec --- /dev/null +++ b/paddle/gserver/tests/test_Mkldnn.cpp @@ -0,0 +1,76 @@ +/* Copyright (c) 2017 PaddlePaddle Authors. All Rights Reserve. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#include +#include +#include +#include "MkldnnTester.h" +#include "ModelConfig.pb.h" + +using namespace paddle; // NOLINT + +DECLARE_bool(thread_local_rand_use_global_seed); +DECLARE_bool(use_gpu); +DECLARE_bool(use_mkldnn); + +struct testFCDesc { + int bs; + int ic; + int oc; + int ih, iw; // oh == ow == 1 +}; + +void testFcLayer(const testFCDesc& pm) { + const std::string compareTypes[] = {"mkldnn_fc", "fc"}; + TestConfig cfg; + cfg.layerConfig.set_type(compareTypes[0]); + cfg.layerConfig.set_size(pm.oc); + cfg.inputDefs.push_back( + {INPUT_DATA, + "layer_0", + /* size of input layer= */ size_t(pm.ic * pm.ih * pm.iw), + /* size of weight= */ size_t(pm.oc * pm.ic * pm.ih * pm.iw)}); + cfg.layerConfig.add_inputs(); + + MkldnnTester tester; + for (auto biasSize : {pm.oc, 0}) { + cfg.biasSize = biasSize; + TestConfig ref = cfg; + ref.layerConfig.set_type(compareTypes[1]); + for (auto bs : {pm.bs, 1}) { + tester.run(cfg, ref, bs, pm.ih, pm.iw); + } + } +} + +TEST(MkldnnLayer, fcLayer) { + testFcLayer({2, 2, 3, 1, 1}); /* + testFcLayer({16, 32, 64, 1, 1}); + testFcLayer({8, 16, 32, 13, 13}); + testFcLayer({4, 12, 18, 13, 11}); + testFcLayer({2, 64, 32, 16, 16}); + testFcLayer({15, 3, 6, 16, 16});*/ +} + +// TODO(TJ): add branch test + +int main(int argc, char** argv) { + testing::InitGoogleTest(&argc, argv); + FLAGS_use_gpu = false; + FLAGS_use_mkldnn = true; + initMain(argc, argv); + FLAGS_thread_local_rand_use_global_seed = true; + srand(1); + return RUN_ALL_TESTS(); +} From 0c951176bd16ade7b347f1f251e8374dca01a6da Mon Sep 17 00:00:00 2001 From: tensor-tang Date: Mon, 7 Aug 2017 21:13:41 +0800 Subject: [PATCH 11/55] pass mkldnn gtest --- paddle/gserver/layers/MkldnnFcLayer.cpp | 24 ++++++++-- paddle/gserver/layers/MkldnnFcLayer.h | 11 +++-- paddle/gserver/layers/MkldnnLayer.cpp | 62 ++++++++++++++++--------- paddle/gserver/layers/MkldnnLayer.h | 27 ++++++++++- paddle/gserver/tests/MkldnnTester.cpp | 30 +++++------- paddle/gserver/tests/test_Mkldnn.cpp | 12 ++--- 6 files changed, 112 insertions(+), 54 deletions(-) diff --git a/paddle/gserver/layers/MkldnnFcLayer.cpp b/paddle/gserver/layers/MkldnnFcLayer.cpp index c3b1f83d7d..29b2cc184d 100644 --- a/paddle/gserver/layers/MkldnnFcLayer.cpp +++ b/paddle/gserver/layers/MkldnnFcLayer.cpp @@ -42,7 +42,6 @@ bool MkldnnFcLayer::init(const LayerMap& layerMap, // create weight weight_ = std::unique_ptr(new Weight(oc_, iLayerSize_, parameters_[0], 0)); - initWgt(); // create biases if (biasParameter_.get() != NULL) { @@ -51,20 +50,36 @@ bool MkldnnFcLayer::init(const LayerMap& layerMap, return true; } -void MkldnnFcLayer::initWgt() { +void MkldnnFcLayer::cvtWgtFromPaddle() { + if (hasInitedWgt_) { + return; + } + // The weight_ is transposed from initial paddle weight MatrixPtr paddleWgt = Matrix::create( weight_->getW()->getData(), iLayerSize_, oc_, false, false); std::ostringstream ostr; paddleWgt->print(ostr); - VLOG(DNN_BASE) << ostr.str(); + VLOG(DNN_ALL) << "Initial Weight from paddle: " << std::endl << ostr.str(); - // Firstly in mkldnn, the matrix is transposed from initial paddle weight + // The mkldnn weight is transposed from initial paddle matrix MatrixPtr paddleWgtT; paddleWgt->transpose(paddleWgtT, true); weight_->getW()->copyFrom(*paddleWgtT); + hasInitedWgt_ = true; +} + +void MkldnnFcLayer::cvtWgtToPaddle() { + MatrixPtr dnnWgt = weight_->getW(); + MatrixPtr paddleWgt; + dnnWgt->transpose(paddleWgt, true); + + // copy paddle weight and override on weight_ + MatrixPtr dnnWgtT = Matrix::create( + dnnWgt->getData(), dnnWgt->getWidth(), dnnWgt->getHeight(), false, false); + dnnWgtT->copyFrom(*paddleWgt); } void MkldnnFcLayer::reshape() { @@ -86,6 +101,7 @@ void MkldnnFcLayer::reshape() { ic_ = iLayerSize_ / (ih_ * iw_); CHECK_EQ(size_t(ic_ * ih_ * iw_), iLayerSize_) << "not divisible"; CHECK_EQ(size_t(oc_), getSize()); + printSizeInfo(); // reset output output_.setFrameHeight(oh_); diff --git a/paddle/gserver/layers/MkldnnFcLayer.h b/paddle/gserver/layers/MkldnnFcLayer.h index 4cc445e87b..0064fc4727 100644 --- a/paddle/gserver/layers/MkldnnFcLayer.h +++ b/paddle/gserver/layers/MkldnnFcLayer.h @@ -29,25 +29,30 @@ protected: // input layer size, can not be change after init size_t iLayerSize_; // == ic * ih * iw + bool hasInitedWgt_; + // fc weight and bias std::unique_ptr weight_; std::unique_ptr biases_; public: - explicit MkldnnFcLayer(const LayerConfig& config) : MkldnnLayer(config) {} + explicit MkldnnFcLayer(const LayerConfig& config) + : MkldnnLayer(config), hasInitedWgt_(false) {} ~MkldnnFcLayer() {} bool init(const LayerMap& layerMap, const ParameterMap& parameterMap) override; - void initWgt(); + void cvtWgtFromPaddle() override; - void reshape(); + void cvtWgtToPaddle() override; void forward(PassType passType) override; void backward(const UpdateCallback& callback) override; + + void reshape(); }; } // namespace paddle diff --git a/paddle/gserver/layers/MkldnnLayer.cpp b/paddle/gserver/layers/MkldnnLayer.cpp index cead3d87ea..0e1e1c3061 100644 --- a/paddle/gserver/layers/MkldnnLayer.cpp +++ b/paddle/gserver/layers/MkldnnLayer.cpp @@ -25,11 +25,18 @@ namespace paddle { bool MkldnnLayer::init(const LayerMap& layerMap, const ParameterMap& parameterMap) { + if (!Layer::init(layerMap, parameterMap)) { + return false; + } + CHECK(FLAGS_use_mkldnn) << "MkldnnLayers only support use_mkldnn." << "Please set WITH_MKLDNN=ON " << "and set use_mkldnn=True"; + stream_.reset(new MkldnnStream()); + engine_ = CpuEngine::Instance().getEngine(); + // TODO(TJ): deivecId - return Layer::init(layerMap, parameterMap); + return true; } void MkldnnLayer::resetForwardFC(int bs, @@ -42,7 +49,6 @@ void MkldnnLayer::resetForwardFC(int bs, real* wgtData, real* biasData) { bool hasSpatial = ih == 1 && iw == 1 ? false : true; - engine_ = CpuEngine::Instance().getEngine(); mem::desc botMD = hasSpatial ? createMD({bs, ic, ih, iw}, format::nchw) : createMD({bs, ic}, format::nc); @@ -52,21 +58,21 @@ void MkldnnLayer::resetForwardFC(int bs, : createMD({}, format::format_undef); mem::desc topMD = createMD({bs, oc}, format::nc); + inVal_.reset(new mem(mem::primitive_desc(botMD, engine_), botData)); + wgtVal_.reset(new mem(mem::primitive_desc(wgtMD, engine_), wgtData)); + outVal_.reset(new mem(mem::primitive_desc(topMD, engine_), topData)); + mkldnn::prop_kind pk = mkldnn::prop_kind::forward; fc_fwd::desc fwdDesc = biasData != NULL ? fc_fwd::desc(pk, botMD, wgtMD, biasMD, topMD) : fc_fwd::desc(pk, botMD, wgtMD, topMD); fc_fwd::primitive_desc fwdPD = fc_fwd::primitive_desc(fwdDesc, engine_); - mem bot = mem(mem::primitive_desc(botMD, engine_), botData); - mem wgt = mem(mem::primitive_desc(wgtMD, engine_), wgtData); - mem top = mem(mem::primitive_desc(topMD, engine_), topData); - if (biasData != NULL) { - mem bias = mem(mem::primitive_desc(biasMD, engine_), biasData); - fwd_.reset(new fc_fwd(fwdPD, bot, wgt, bias, top)); + biasVal_.reset(new mem(mem::primitive_desc(biasMD, engine_), biasData)); + fwd_.reset(new fc_fwd(fwdPD, *inVal_, *wgtVal_, *biasVal_, *outVal_)); } else { - fwd_.reset(new fc_fwd(fwdPD, bot, wgt, top)); + fwd_.reset(new fc_fwd(fwdPD, *inVal_, *wgtVal_, *outVal_)); } pipelineFwd_.clear(); pipelineFwd_.push_back(*fwd_); @@ -84,8 +90,12 @@ void MkldnnLayer::mkldnnForwardFC(int bs, // if input size changed, reset it resetForwardFC(bs, ic, ih, iw, botData, oc, topData, wgtData, biasData); + this->cvtWgtFromPaddle(); + + // update input, since the data might be changed if this is after data layer + inVal_->set_data_handle(botData); + // just forward - // update botdata stream_->submit(pipelineFwd_); } @@ -112,6 +122,10 @@ void MkldnnLayer::resetBackwardFC(int bs, mem::desc biasMD = biasDiff != NULL ? createMD({oc}, format::x) : createMD({}, format::format_undef); + inVal_.reset(new mem(mem::primitive_desc(botMD, engine_), botData)); + wgtGrad_.reset(new mem(mem::primitive_desc(wgtMD, engine_), wgtDiff)); + outGrad_.reset(new mem(mem::primitive_desc(topMD, engine_), topDiff)); + fc_fwd::desc fwdDesc = fc_fwd::desc(mkldnn::prop_kind::forward, botMD, wgtMD, topMD); fc_fwd::primitive_desc fwdPD = fc_fwd::primitive_desc(fwdDesc, engine_); @@ -121,15 +135,12 @@ void MkldnnLayer::resetBackwardFC(int bs, fc_bwdWgt::primitive_desc bwdWgtPD = fc_bwdWgt::primitive_desc(bwdWgtDesc, engine_, fwdPD); - mem botVal = mem(mem::primitive_desc(botMD, engine_), botData); - mem wgtGrad = mem(mem::primitive_desc(wgtMD, engine_), wgtDiff); - mem topGrad = mem(mem::primitive_desc(topMD, engine_), topDiff); - if (biasDiff != NULL) { - mem biasGrad = mem(mem::primitive_desc(biasMD, engine_), biasDiff); - bwdWgt_.reset(new fc_bwdWgt(bwdWgtPD, botVal, topGrad, wgtGrad, biasGrad)); + biasGrad_.reset(new mem(mem::primitive_desc(biasMD, engine_), biasDiff)); + bwdWgt_.reset( + new fc_bwdWgt(bwdWgtPD, *inVal_, *outGrad_, *wgtGrad_, *biasGrad_)); } else { - bwdWgt_.reset(new fc_bwdWgt(bwdWgtPD, botVal, topGrad, wgtGrad)); + bwdWgt_.reset(new fc_bwdWgt(bwdWgtPD, *inVal_, *outGrad_, *wgtGrad_)); } pipelineBwd_.clear(); pipelineBwd_.push_back(*bwdWgt_); @@ -142,9 +153,9 @@ void MkldnnLayer::resetBackwardFC(int bs, fc_bwdData::desc bwdDataDesc = fc_bwdData::desc(botMD, wgtMD, topMD); fc_bwdData::primitive_desc bwdDataPD = fc_bwdData::primitive_desc(bwdDataDesc, engine_, fwdPD); - mem botGrad = mem(mem::primitive_desc(botMD, engine_), botDiff); - mem wgtVal = mem(mem::primitive_desc(wgtMD, engine_), wgtData); - bwdData_.reset(new fc_bwdData(bwdDataPD, topGrad, wgtVal, botGrad)); + inGrad_.reset(new mem(mem::primitive_desc(botMD, engine_), botDiff)); + wgtVal_.reset(new mem(mem::primitive_desc(wgtMD, engine_), wgtData)); + bwdData_.reset(new fc_bwdData(bwdDataPD, *outGrad_, *wgtVal_, *inGrad_)); pipelineBwd_.push_back(*bwdData_); } @@ -172,11 +183,18 @@ void MkldnnLayer::mkldnnBackwardFC(int bs, wgtData, biasDiff); - // just forward - // update botdata + // update data + outGrad_->set_data_handle(topDiff); + stream_->submit(pipelineBwd_); } +void MkldnnLayer::printSizeInfo() { + VLOG(DNN_SIZES) << "bs: " << bs_ << ", ic: " << ic_ << ", ih: " << ih_ + << ", iw: " << iw_ << ", oc: " << oc_ << ", oh: " << oh_ + << ", ow: " << ow_; +} + mem::desc MkldnnLayer::createMD(mem::dims dims, mem::format fmt, mem::data_type type) { diff --git a/paddle/gserver/layers/MkldnnLayer.h b/paddle/gserver/layers/MkldnnLayer.h index 5927bd6d52..a9eb9f79da 100644 --- a/paddle/gserver/layers/MkldnnLayer.h +++ b/paddle/gserver/layers/MkldnnLayer.h @@ -40,13 +40,24 @@ protected: // mkldnn engine, stream and primivtives mkldnn::engine engine_; std::shared_ptr stream_; - std::shared_ptr fwd_; std::shared_ptr bwdWgt_; std::shared_ptr bwdData_; std::vector pipelineFwd_; std::vector pipelineBwd_; + // TODO(TJ): change below memory as MkldnnMatrixPtr type + // input == bottom, output == top + // value == data, grad == diff + std::shared_ptr inVal_; + std::shared_ptr inGrad_; + std::shared_ptr outVal_; + std::shared_ptr outGrad_; + std::shared_ptr wgtVal_; + std::shared_ptr wgtGrad_; + std::shared_ptr biasVal_; + std::shared_ptr biasGrad_; + public: explicit MkldnnLayer(const LayerConfig& config) : Layer(config), @@ -67,6 +78,20 @@ public: virtual bool init(const LayerMap& layerMap, const ParameterMap& parameterMap); + virtual void printSizeInfo(); + + /** + * convert weight from paddle format to mkldnn format + * weight_ will be override + */ + virtual void cvtWgtFromPaddle() { ; } + + /** + * convert mkldnn weight to paddle format + * weight_ will be override + */ + virtual void cvtWgtToPaddle() { ; } + void resetForwardFC(int bs, int ic, int ih, diff --git a/paddle/gserver/tests/MkldnnTester.cpp b/paddle/gserver/tests/MkldnnTester.cpp index 38e5bc75be..ecf0f9124d 100644 --- a/paddle/gserver/tests/MkldnnTester.cpp +++ b/paddle/gserver/tests/MkldnnTester.cpp @@ -14,6 +14,7 @@ limitations under the License. */ #include "MkldnnTester.h" #include "paddle/gserver/layers/MkldnnBase.h" +#include "paddle/gserver/layers/MkldnnLayer.h" namespace paddle { @@ -145,7 +146,10 @@ void MkldnnTester::checkBackwardWgts() { vector dnnWgts; // used to temply save mkldnn weights saveWgt(parameters_[DNN], dnnWgts); - // TODO(TJ): cvtWgtToPaddle + const MkldnnLayerPtr dnnlayer = + std::dynamic_pointer_cast(dnnLayer_); + CHECK(dnnlayer); + dnnlayer->cvtWgtToPaddle(); for (size_t i = 0; i < parameters_[DNN].size(); ++i) { const VectorPtr& dnn = parameters_[DNN][i]->getBuf(PARAMETER_VALUE); const VectorPtr& ref = parameters_[REF][i]->getBuf(PARAMETER_VALUE); @@ -233,11 +237,10 @@ void MkldnnTester::printMatrix(const MatrixPtr& m) { if (!log_) { return; } -#ifdef _DEBUG - std::ostream str; - m->print(str); - VLOG(lvl_) << str; -#endif + + std::ostringstream ostr; + m->print(ostr); + VLOG(lvl_) << std::endl << ostr.str(); } void MkldnnTester::printVector(const VectorPtr& v) { @@ -245,15 +248,9 @@ void MkldnnTester::printVector(const VectorPtr& v) { return; } - CHECK(v); - CHECK(v->getData()); - const real* pd = v->getData(); - const size_t sz = v->getSize(); - std::stringstream row; - for (size_t i = 0; i < sz; ++i) { - row << pd[i] << ", "; - } - VLOG(lvl_) << row.str(); + std::ostringstream ostr; + v->print(ostr, v->getSize()); + VLOG(lvl_) << std::endl << ostr.str(); } double MkldnnTester::getDelta(const real* d1, @@ -335,7 +332,6 @@ void MkldnnTester::run(const TestConfig& dnn, // Firstly always set flag false to initial from paddle weight TestConfig first = dnn; - // first.layerConfig.set_init_wgt_from_mkldnn(false); // reset and run once reset(first, ref, batchSize); @@ -348,8 +344,6 @@ void MkldnnTester::run(const TestConfig& dnn, // firstly get the flag bool initWgtFromMkldnn = false; - // dnn.layerConfig.has_init_wgt_from_mkldnn() && - // dnn.layerConfig.init_wgt_from_mkldnn(); if (initWgtFromMkldnn) { // after run once the mkldnn weight has been stored in dnnlayer diff --git a/paddle/gserver/tests/test_Mkldnn.cpp b/paddle/gserver/tests/test_Mkldnn.cpp index c2c6b701ec..1d367e6180 100644 --- a/paddle/gserver/tests/test_Mkldnn.cpp +++ b/paddle/gserver/tests/test_Mkldnn.cpp @@ -55,12 +55,12 @@ void testFcLayer(const testFCDesc& pm) { } TEST(MkldnnLayer, fcLayer) { - testFcLayer({2, 2, 3, 1, 1}); /* - testFcLayer({16, 32, 64, 1, 1}); - testFcLayer({8, 16, 32, 13, 13}); - testFcLayer({4, 12, 18, 13, 11}); - testFcLayer({2, 64, 32, 16, 16}); - testFcLayer({15, 3, 6, 16, 16});*/ + testFcLayer({2, 2, 3, 1, 1}); + testFcLayer({3, 7, 19, 1, 1}); + testFcLayer({8, 16, 32, 13, 13}); + testFcLayer({4, 12, 18, 13, 11}); + testFcLayer({2, 64, 32, 16, 16}); + testFcLayer({15, 3, 6, 16, 16}); } // TODO(TJ): add branch test From 7e3747131899685b7b058241576a1e2e96f172ea Mon Sep 17 00:00:00 2001 From: zchen0211 Date: Mon, 7 Aug 2017 14:26:47 -0700 Subject: [PATCH 12/55] modify --- paddle/operators/CMakeLists.txt | 5 +- paddle/operators/{gather_func.h => gather.h} | 0 paddle/operators/gather_func.cc | 19 --- paddle/operators/gather_test.cc | 4 +- paddle/operators/scatter_func.h | 116 ------------------- 5 files changed, 3 insertions(+), 141 deletions(-) rename paddle/operators/{gather_func.h => gather.h} (100%) delete mode 100644 paddle/operators/gather_func.cc delete mode 100644 paddle/operators/scatter_func.h diff --git a/paddle/operators/CMakeLists.txt b/paddle/operators/CMakeLists.txt index 10922892ca..a2284fc8f0 100644 --- a/paddle/operators/CMakeLists.txt +++ b/paddle/operators/CMakeLists.txt @@ -41,10 +41,7 @@ function(op_library TARGET) endif() endfunction() -op_library(gather SRCS gather_func.cc) -cc_test(gather_test SRCS gather_test.cc DEPS gather) - -op_library(scatter SRCS scatter_func.cc) +cc_test(gather_test SRCS gather_test.cc DEPS tensor) op_library(add_op SRCS add_op.cc add_op.cu) cc_test(add_op_test SRCS add_op_test.cc DEPS add_op) diff --git a/paddle/operators/gather_func.h b/paddle/operators/gather.h similarity index 100% rename from paddle/operators/gather_func.h rename to paddle/operators/gather.h diff --git a/paddle/operators/gather_func.cc b/paddle/operators/gather_func.cc deleted file mode 100644 index a6b2331f32..0000000000 --- a/paddle/operators/gather_func.cc +++ /dev/null @@ -1,19 +0,0 @@ -/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#include "paddle/operators/gather_func.h" -#include -#include "paddle/framework/ddim.h" -#include "paddle/framework/tensor.h" -#include "paddle/platform/place.h" diff --git a/paddle/operators/gather_test.cc b/paddle/operators/gather_test.cc index 6f220b133b..5d84b7b5f3 100644 --- a/paddle/operators/gather_test.cc +++ b/paddle/operators/gather_test.cc @@ -12,16 +12,16 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. */ +#include "paddle/operators/gather.h" #include "paddle/framework/ddim.h" #include "paddle/framework/tensor.h" -#include "paddle/operators/gather_func.h" #include "paddle/platform/place.h" #include #include #include -TEST(_abc_, GatherData) { +TEST(Gather, GatherData) { using namespace paddle::framework; using namespace paddle::platform; using namespace paddle::operators; diff --git a/paddle/operators/scatter_func.h b/paddle/operators/scatter_func.h deleted file mode 100644 index 53b260170f..0000000000 --- a/paddle/operators/scatter_func.h +++ /dev/null @@ -1,116 +0,0 @@ -/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#pragma once -#include -#include "paddle/framework/ddim.h" -#include "paddle/framework/tensor.h" -#include "paddle/platform/place.h" - -/** - * Return a updated tensor from source tensor, scattered according to index: - * dst[i] += src[index[i]] - * input[src]: type-T source Tensor - * input[index]: type-int index Tensor (1-D) - * return: output tensor - */ -template -void ScatterUpdate(Tensor* src, Tensor* dst, Tensor* index) { - // Source shape - auto src_dims = src->dims(); - auto dst_dims = dst->dims(); - DDim output_dims(dims_src); - - // check src shape and dst shape should match - for (size_t i = 1; i < src_dims.size(); i++) - PADDLE_ENFORCE(src_dims[i] == dst_dims[i]); - - int index_size = index->dims()[0]; - - /* slice size */ - int slice_size = 1; - for (size_t i = 0; i < src_dims.size(); ++i) slice_size *= src_dims[i]; - - if (place == CPUPlace()) { - // init - output = new_tensor.mutable_data(output_dims, CPUPlace()); - CPUScatterUpdate( - src->data(), index->data(), slice_size, new_tensor->mutable_data()); - - } else { // GPU - // init - output = new_tensor.mutable_data(output_dims, GPUPlace()); - /* how to specialize device??*/ - GPUScatterUpdate( - d, src->data(), index->data(), slice_size, new_tensor->mutable_data()); - } -} - -/* Implementation of CPU copy */ -template -void CPUScatterUpdate(const T* src, - const int* index, - const int slice_size, - const int index_size, - T* output) { - // const size_t slice_bytes = slice_size * sizeof(T); - - for (size_t i = 0; i < index_size; ++i) { - int index_ = index[i]; - math::vAdd(slice_size, - src + index_ * slice_bytes, - output + i * slice_bytes, - output + i * slice_bytes); - } -} - -/* Implementation of GPU scatter: - I suppose the GPUDevice& d, contains gpu_id and thread_id - d = cuda_stream(gpu_id_, stream_id_); -*/ -template -void GPUScatterUpdate(const GPUDevice& d, - const T* src, - const int* index, - const int slice_size, - const int index_size, - T* output) { - int block_count = slice_size * index_size; - int thread_per_block = 1024; - - ScatterOpKernel<<>>( - src, index, output, slice_size, indices_size, slice_size, out_size); -} - -template -__global__ void ScatterOpKernel(const T* params, - const int* indices, - T* out, - int64 indices_size, - int64 slice_size, - int64 out_size) { - /* I suppose we have the following macro, - which I strongly suggest that we should put in cuda: - #define CUDA_1D_KERNEL_LOOP(i, n) \ - for (int i = blockIdx.x * blockDim.x + threadIdx.x; i < n; \ - i += blockDim.x * gridDim.x) - */ - CUDA_1D_KERNEL_LOOP(i, out_size) { - int indices_i = i / slice_size; - int slice_i = i - indices_i * slice_size; // offset inside the slice - int scatter_i = indices[indices_i]; - int params_i = scatter_i * slice_size + slice_i; - out[i] += *(params + params_i); - } -} From 6373291c7787c83335cc64d56294756872493301 Mon Sep 17 00:00:00 2001 From: tensor-tang Date: Tue, 8 Aug 2017 19:34:57 +0800 Subject: [PATCH 13/55] add test case use_mkldnn_wgt --- paddle/gserver/layers/MkldnnBase.h | 2 - paddle/gserver/layers/MkldnnFcLayer.cpp | 4 ++ paddle/gserver/layers/MkldnnLayer.h | 3 ++ paddle/gserver/tests/MkldnnTester.cpp | 60 +++++++++++-------------- paddle/gserver/tests/MkldnnTester.h | 4 +- paddle/gserver/tests/test_Mkldnn.cpp | 1 + paddle/trainer/TrainerConfigHelper.cpp | 2 + paddle/utils/Flags.cpp | 1 + paddle/utils/Flags.h | 1 + python/paddle/trainer/config_parser.py | 5 ++- 10 files changed, 45 insertions(+), 38 deletions(-) diff --git a/paddle/gserver/layers/MkldnnBase.h b/paddle/gserver/layers/MkldnnBase.h index eba72e58e5..260dbe45e4 100644 --- a/paddle/gserver/layers/MkldnnBase.h +++ b/paddle/gserver/layers/MkldnnBase.h @@ -23,8 +23,6 @@ typedef enum { DNN_TESTS = 1, DNN_SIZES, DNN_FMTS, - DNN_TESTS_DETAILS, - DNN_TESTS_MORE, DNN_ALL, } DNN_LOG_LEVEL; diff --git a/paddle/gserver/layers/MkldnnFcLayer.cpp b/paddle/gserver/layers/MkldnnFcLayer.cpp index 29b2cc184d..7e09ed33d2 100644 --- a/paddle/gserver/layers/MkldnnFcLayer.cpp +++ b/paddle/gserver/layers/MkldnnFcLayer.cpp @@ -51,6 +51,10 @@ bool MkldnnFcLayer::init(const LayerMap& layerMap, } void MkldnnFcLayer::cvtWgtFromPaddle() { + if (FLAGS_use_mkldnn_wgt) { + return; + } + if (hasInitedWgt_) { return; } diff --git a/paddle/gserver/layers/MkldnnLayer.h b/paddle/gserver/layers/MkldnnLayer.h index a9eb9f79da..c653eb9985 100644 --- a/paddle/gserver/layers/MkldnnLayer.h +++ b/paddle/gserver/layers/MkldnnLayer.h @@ -19,6 +19,9 @@ limitations under the License. */ #include "MkldnnBase.h" #include "mkldnn.hpp" +DECLARE_bool(use_mkldnn); +DECLARE_bool(use_mkldnn_wgt); + namespace paddle { class MkldnnLayer; diff --git a/paddle/gserver/tests/MkldnnTester.cpp b/paddle/gserver/tests/MkldnnTester.cpp index ecf0f9124d..ef99b384a9 100644 --- a/paddle/gserver/tests/MkldnnTester.cpp +++ b/paddle/gserver/tests/MkldnnTester.cpp @@ -118,7 +118,7 @@ void MkldnnTester::checkForward() { printTopDatas(); double delta = compareMatrix(testLayers_[DNN]->getOutputValue(), testLayers_[REF]->getOutputValue()); - VLOG(DNN_TESTS_DETAILS) << "Check Forward"; + VLOG(DNN_ALL) << "Check Forward"; EXPECT_LE(fabs(delta), eps_); } @@ -162,7 +162,7 @@ void MkldnnTester::checkBackwardWgts() { EXPECT_LE(fabs(delta), eps_); } - VLOG(DNN_TESTS_DETAILS) << "Restore dnn weights before comapre"; + VLOG(DNN_ALL) << "Restore dnn weights before comapre"; restoreWgt(dnnWgts, parameters_[DNN]); } @@ -275,8 +275,8 @@ double MkldnnTester::getDelta(const real* d1, EXPECT_TRUE(std::isnormal(sum)); EXPECT_FALSE(std::isinf(sum)); EXPECT_FALSE(std::isnan(delta)); - VLOG(DNN_TESTS_MORE) << "reference avg data: " << sum / len - << ", delta: " << delta / sum << ", failCnt:" << failCnt; + VLOG(DNN_ALL) << "reference avg data: " << sum / len + << ", delta: " << delta / sum << ", failCnt:" << failCnt; return (failCnt / (float)len) > failRate ? maxOut : delta / sum; } @@ -330,43 +330,37 @@ void MkldnnTester::run(const TestConfig& dnn, log_ = log; lvl_ = level; - // Firstly always set flag false to initial from paddle weight - TestConfig first = dnn; - + // Firstly test FLAGS_use_mkldnn_wgt = false + FLAGS_use_mkldnn_wgt = false; // reset and run once - reset(first, ref, batchSize); + reset(dnn, ref, batchSize); randomWgtDatas(); clearWgtDiffs(); clearBotDiffs(); + for (size_t i = 0; i < iter_; ++i) { + VLOG(DNN_TESTS) << "Check Iteration " << i; + runOnce(); + } - VLOG(DNN_TESTS) << "Check Iteration 0"; - runOnce(); - - // firstly get the flag - bool initWgtFromMkldnn = false; - - if (initWgtFromMkldnn) { - // after run once the mkldnn weight has been stored in dnnlayer - // then save the weigths and restart again - vector dnnWgts, refWgts; - CHECK_EQ(parameters_[DNN].size(), parameters_[REF].size()); - saveWgt(parameters_[DNN], dnnWgts); - saveWgt(parameters_[REF], refWgts); - - // restart again with flag true - reset(dnn, ref, batchSize); + // Then test FLAGS_use_mkldnn_wgt = true + FLAGS_use_mkldnn_wgt = true; + // after run once the mkldnn weight has been stored in dnnlayer + // then save the weigths and restart again + vector dnnWgts, refWgts; + CHECK_EQ(parameters_[DNN].size(), parameters_[REF].size()); + saveWgt(parameters_[DNN], dnnWgts); + saveWgt(parameters_[REF], refWgts); - // restore wgt - restoreWgt(dnnWgts, parameters_[DNN]); - restoreWgt(refWgts, parameters_[REF]); - clearWgtDiffs(); - clearBotDiffs(); + // restart again with flag true + reset(dnn, ref, batchSize); - // at least run once - runOnce(); - } + // restore wgt + restoreWgt(dnnWgts, parameters_[DNN]); + restoreWgt(refWgts, parameters_[REF]); + clearWgtDiffs(); + clearBotDiffs(); - for (size_t i = 1; i < iter_; ++i) { + for (size_t i = 0; i < iter_; ++i) { VLOG(DNN_TESTS) << "Check Iteration " << i; runOnce(); } diff --git a/paddle/gserver/tests/MkldnnTester.h b/paddle/gserver/tests/MkldnnTester.h index 16b0970a8e..8b3049b5c2 100644 --- a/paddle/gserver/tests/MkldnnTester.h +++ b/paddle/gserver/tests/MkldnnTester.h @@ -58,7 +58,7 @@ public: iter_ = iter; eps_ = epsilon; log_ = false; - lvl_ = DNN_TESTS_MORE; + lvl_ = DNN_ALL; } ~MkldnnTester() {} @@ -72,7 +72,7 @@ public: size_t iter = 3, float epsilon = 1e-4, bool log = false, - int level = DNN_TESTS_MORE); + int level = DNN_ALL); void setLogLevel(int lvl) { lvl_ = lvl; } private: diff --git a/paddle/gserver/tests/test_Mkldnn.cpp b/paddle/gserver/tests/test_Mkldnn.cpp index 1d367e6180..0516a059de 100644 --- a/paddle/gserver/tests/test_Mkldnn.cpp +++ b/paddle/gserver/tests/test_Mkldnn.cpp @@ -23,6 +23,7 @@ using namespace paddle; // NOLINT DECLARE_bool(thread_local_rand_use_global_seed); DECLARE_bool(use_gpu); DECLARE_bool(use_mkldnn); +DECLARE_bool(use_mkldnn_wgt); struct testFCDesc { int bs; diff --git a/paddle/trainer/TrainerConfigHelper.cpp b/paddle/trainer/TrainerConfigHelper.cpp index a0a365aa0b..eba40862b9 100644 --- a/paddle/trainer/TrainerConfigHelper.cpp +++ b/paddle/trainer/TrainerConfigHelper.cpp @@ -29,6 +29,7 @@ DECLARE_bool(with_gpu); DECLARE_bool(parallel_nn); DECLARE_string(config_args); DECLARE_bool(use_mkldnn); +DECLARE_bool(use_mkldnn_wgt); const char *kConfigParserModuleName = "paddle.trainer.config_parser"; const char *kConfigParserFuncName = "parse_config_and_serialize"; @@ -46,6 +47,7 @@ TrainerConfigHelper::TrainerConfigHelper(const std::string &configFilePath) << ",with_cost=" << FLAGS_with_cost << ",use_gpu=" << FLAGS_use_gpu << ",parallel_nn=" << FLAGS_parallel_nn << ",use_mkldnn=" << FLAGS_use_mkldnn + << ",use_mkldnn_wgt=" << FLAGS_use_mkldnn_wgt << ",cudnn_version=" << hl_get_cudnn_lib_version(); if (!FLAGS_config_args.empty()) { configArgs << "," << FLAGS_config_args; diff --git a/paddle/utils/Flags.cpp b/paddle/utils/Flags.cpp index ab1c181c62..600c83a848 100644 --- a/paddle/utils/Flags.cpp +++ b/paddle/utils/Flags.cpp @@ -27,6 +27,7 @@ DEFINE_bool(use_mkldnn, false, "Default still keep use CPU training"); DEFINE_bool(use_mkldnn, false, "Only support CPU training"); #endif +DEFINE_bool(use_mkldnn_wgt, false, "Init weight from CPU weight"); DEFINE_bool(parallel_nn, false, "Whether to use multi-threads to calculate one neural network." diff --git a/paddle/utils/Flags.h b/paddle/utils/Flags.h index 1832bb515e..0aca4c0ee0 100644 --- a/paddle/utils/Flags.h +++ b/paddle/utils/Flags.h @@ -41,3 +41,4 @@ DECLARE_string(predict_file); DECLARE_bool(prev_batch_state); DECLARE_string(init_model_path); DECLARE_bool(use_mkldnn); +DECLARE_bool(use_mkldnn_wgt); diff --git a/python/paddle/trainer/config_parser.py b/python/paddle/trainer/config_parser.py index ae39abc081..dd79f3a043 100644 --- a/python/paddle/trainer/config_parser.py +++ b/python/paddle/trainer/config_parser.py @@ -1619,6 +1619,8 @@ class FCLayer(LayerBase): config_assert( len(inputs) == 1, "MkldnnFCLayer support one and only one input!") + use_mkldnn_wgt = bool( + int(g_command_config_args.get("use_mkldnn_wgt", 0))) super(FCLayer, self).__init__( name, self.layer_type, size, inputs=inputs, **xargs) for input_index in xrange(len(self.inputs)): @@ -1627,9 +1629,10 @@ class FCLayer(LayerBase): format = self.inputs[input_index].format sparse = format == "csr" or format == "csc" if use_mkldnn: - dims = [self.config.size, input_layer.size] config_assert(not sparse, "MkldnnFCLayer do not support sparse format yet") + if use_mkldnn and use_mkldnn_wgt: + dims = [self.config.size, input_layer.size] else: dims = [input_layer.size, self.config.size] if sparse: From e18fbd82082096227bc3f8c51fc7b2a11c2f2707 Mon Sep 17 00:00:00 2001 From: tensor-tang Date: Tue, 8 Aug 2017 20:07:38 +0800 Subject: [PATCH 14/55] skip reset mkldnn when input size does not change --- paddle/gserver/layers/MkldnnLayer.cpp | 30 +++++++++++++++++++++------ 1 file changed, 24 insertions(+), 6 deletions(-) diff --git a/paddle/gserver/layers/MkldnnLayer.cpp b/paddle/gserver/layers/MkldnnLayer.cpp index 0e1e1c3061..c909fe274d 100644 --- a/paddle/gserver/layers/MkldnnLayer.cpp +++ b/paddle/gserver/layers/MkldnnLayer.cpp @@ -49,7 +49,6 @@ void MkldnnLayer::resetForwardFC(int bs, real* wgtData, real* biasData) { bool hasSpatial = ih == 1 && iw == 1 ? false : true; - mem::desc botMD = hasSpatial ? createMD({bs, ic, ih, iw}, format::nchw) : createMD({bs, ic}, format::nc); mem::desc wgtMD = hasSpatial ? createMD({oc, ic, ih, iw}, format::oihw) @@ -58,7 +57,12 @@ void MkldnnLayer::resetForwardFC(int bs, : createMD({}, format::format_undef); mem::desc topMD = createMD({bs, oc}, format::nc); - inVal_.reset(new mem(mem::primitive_desc(botMD, engine_), botData)); + mem::primitive_desc botPD = mem::primitive_desc(botMD, engine_); + if (inVal_ && inVal_->get_primitive_desc() == botPD) { + return; + } + + inVal_.reset(new mem(botPD, botData)); wgtVal_.reset(new mem(mem::primitive_desc(wgtMD, engine_), wgtData)); outVal_.reset(new mem(mem::primitive_desc(topMD, engine_), topData)); @@ -111,7 +115,6 @@ void MkldnnLayer::resetBackwardFC(int bs, real* wgtData, real* biasDiff) { bool hasSpatial = ih == 1 && iw == 1 ? false : true; - engine_ = CpuEngine::Instance().getEngine(); // backward weight mem::desc botMD = hasSpatial ? createMD({bs, ic, ih, iw}, format::nchw) @@ -122,9 +125,19 @@ void MkldnnLayer::resetBackwardFC(int bs, mem::desc biasMD = biasDiff != NULL ? createMD({oc}, format::x) : createMD({}, format::format_undef); - inVal_.reset(new mem(mem::primitive_desc(botMD, engine_), botData)); + mem::primitive_desc topPD = mem::primitive_desc(botMD, engine_); + if (outGrad_ && outGrad_->get_primitive_desc() == topPD) { + return; + } + + if (inVal_) { + // update data + inVal_->set_data_handle(botData); + } else { + inVal_.reset(new mem(mem::primitive_desc(botMD, engine_), botData)); + } wgtGrad_.reset(new mem(mem::primitive_desc(wgtMD, engine_), wgtDiff)); - outGrad_.reset(new mem(mem::primitive_desc(topMD, engine_), topDiff)); + outGrad_.reset(new mem(topPD, topDiff)); fc_fwd::desc fwdDesc = fc_fwd::desc(mkldnn::prop_kind::forward, botMD, wgtMD, topMD); @@ -154,7 +167,12 @@ void MkldnnLayer::resetBackwardFC(int bs, fc_bwdData::primitive_desc bwdDataPD = fc_bwdData::primitive_desc(bwdDataDesc, engine_, fwdPD); inGrad_.reset(new mem(mem::primitive_desc(botMD, engine_), botDiff)); - wgtVal_.reset(new mem(mem::primitive_desc(wgtMD, engine_), wgtData)); + if (wgtVal_) { + // update data + wgtVal_->set_data_handle(wgtData); + } else { + wgtVal_.reset(new mem(mem::primitive_desc(wgtMD, engine_), wgtData)); + } bwdData_.reset(new fc_bwdData(bwdDataPD, *outGrad_, *wgtVal_, *inGrad_)); pipelineBwd_.push_back(*bwdData_); } From 6159f5db14b580fab0386fdbe258b26c892be257 Mon Sep 17 00:00:00 2001 From: zchen0211 Date: Tue, 8 Aug 2017 16:38:51 -0700 Subject: [PATCH 15/55] code style fix --- paddle/operators/gather.h | 22 ++++++---------------- 1 file changed, 6 insertions(+), 16 deletions(-) diff --git a/paddle/operators/gather.h b/paddle/operators/gather.h index 5adc1e6b17..8b02156545 100644 --- a/paddle/operators/gather.h +++ b/paddle/operators/gather.h @@ -28,11 +28,8 @@ namespace operators { /* Implementation of CPU copy */ template -void CPUGather(const T* params, - const int* indices, - const int slice_size, - const int index_size, - T* output) { +void CPUGather(const T* params, const int* indices, const int slice_size, + const int index_size, T* output) { const size_t slice_bytes = slice_size * sizeof(T); for (size_t i = 0; i < index_size; ++i) { @@ -47,11 +44,8 @@ void CPUGather(const T* params, d = cuda_stream(gpu_id_, stream_id_); */ template -void GPUGather(const T* src, - const int* index, - const int slice_size, - const int index_size, - T* output); +void GPUGather(const T* src, const int* index, const int slice_size, + const int index_size, T* output); /** * Return a new tensor from source tensor, gathered according to index @@ -60,8 +54,7 @@ void GPUGather(const T* src, * return: output tensor */ template -void Gather(const platform::Place& place, - const paddle::framework::Tensor* src, +void Gather(const platform::Place& place, const paddle::framework::Tensor* src, const paddle::framework::Tensor* index, paddle::framework::Tensor* output) { // check index of shape 1-D @@ -78,10 +71,7 @@ void Gather(const platform::Place& place, // Gathering if (platform::is_cpu_place(place)) { - CPUGather(src->data(), - index->data(), - slice_size, - index_size, + CPUGather(src->data(), index->data(), slice_size, index_size, output->data()); } else { // init for GPU From f6a940936b5f44ebf99a9925991158fdd3beaffd Mon Sep 17 00:00:00 2001 From: tensor-tang Date: Tue, 8 Aug 2017 21:22:15 +0800 Subject: [PATCH 16/55] remove unused comments, refine and rename --- paddle/gserver/layers/MkldnnFcLayer.cpp | 4 ++-- paddle/gserver/layers/MkldnnFcLayer.h | 4 ++-- paddle/gserver/layers/MkldnnLayer.cpp | 9 ++++----- paddle/gserver/layers/MkldnnLayer.h | 4 ++-- paddle/gserver/tests/MkldnnTester.cpp | 2 +- python/paddle/trainer/config_parser.py | 4 ++-- 6 files changed, 13 insertions(+), 14 deletions(-) diff --git a/paddle/gserver/layers/MkldnnFcLayer.cpp b/paddle/gserver/layers/MkldnnFcLayer.cpp index 7e09ed33d2..e4c4d4675d 100644 --- a/paddle/gserver/layers/MkldnnFcLayer.cpp +++ b/paddle/gserver/layers/MkldnnFcLayer.cpp @@ -50,7 +50,7 @@ bool MkldnnFcLayer::init(const LayerMap& layerMap, return true; } -void MkldnnFcLayer::cvtWgtFromPaddle() { +void MkldnnFcLayer::convertWeightsFromPaddle() { if (FLAGS_use_mkldnn_wgt) { return; } @@ -75,7 +75,7 @@ void MkldnnFcLayer::cvtWgtFromPaddle() { hasInitedWgt_ = true; } -void MkldnnFcLayer::cvtWgtToPaddle() { +void MkldnnFcLayer::convertWeightsToPaddle() { MatrixPtr dnnWgt = weight_->getW(); MatrixPtr paddleWgt; dnnWgt->transpose(paddleWgt, true); diff --git a/paddle/gserver/layers/MkldnnFcLayer.h b/paddle/gserver/layers/MkldnnFcLayer.h index 0064fc4727..f891052284 100644 --- a/paddle/gserver/layers/MkldnnFcLayer.h +++ b/paddle/gserver/layers/MkldnnFcLayer.h @@ -44,9 +44,9 @@ public: bool init(const LayerMap& layerMap, const ParameterMap& parameterMap) override; - void cvtWgtFromPaddle() override; + void convertWeightsFromPaddle() override; - void cvtWgtToPaddle() override; + void convertWeightsToPaddle() override; void forward(PassType passType) override; diff --git a/paddle/gserver/layers/MkldnnLayer.cpp b/paddle/gserver/layers/MkldnnLayer.cpp index c909fe274d..6bd2b15a17 100644 --- a/paddle/gserver/layers/MkldnnLayer.cpp +++ b/paddle/gserver/layers/MkldnnLayer.cpp @@ -14,7 +14,6 @@ limitations under the License. */ #include "MkldnnLayer.h" -// using namespace mkldnn; // NOLINT using mem = mkldnn::memory; // NOLINT typedef mem::format format; typedef mkldnn::inner_product_forward fc_fwd; @@ -94,7 +93,7 @@ void MkldnnLayer::mkldnnForwardFC(int bs, // if input size changed, reset it resetForwardFC(bs, ic, ih, iw, botData, oc, topData, wgtData, biasData); - this->cvtWgtFromPaddle(); + this->convertWeightsFromPaddle(); // update input, since the data might be changed if this is after data layer inVal_->set_data_handle(botData); @@ -208,9 +207,9 @@ void MkldnnLayer::mkldnnBackwardFC(int bs, } void MkldnnLayer::printSizeInfo() { - VLOG(DNN_SIZES) << "bs: " << bs_ << ", ic: " << ic_ << ", ih: " << ih_ - << ", iw: " << iw_ << ", oc: " << oc_ << ", oh: " << oh_ - << ", ow: " << ow_; + VLOG(DNN_SIZES) << getName() << ": bs: " << bs_ << ", ic: " << ic_ + << ", ih: " << ih_ << ", iw: " << iw_ << ", oc: " << oc_ + << ", oh: " << oh_ << ", ow: " << ow_; } mem::desc MkldnnLayer::createMD(mem::dims dims, diff --git a/paddle/gserver/layers/MkldnnLayer.h b/paddle/gserver/layers/MkldnnLayer.h index c653eb9985..e5c93500c7 100644 --- a/paddle/gserver/layers/MkldnnLayer.h +++ b/paddle/gserver/layers/MkldnnLayer.h @@ -87,13 +87,13 @@ public: * convert weight from paddle format to mkldnn format * weight_ will be override */ - virtual void cvtWgtFromPaddle() { ; } + virtual void convertWeightsFromPaddle() {} /** * convert mkldnn weight to paddle format * weight_ will be override */ - virtual void cvtWgtToPaddle() { ; } + virtual void convertWeightsToPaddle() {} void resetForwardFC(int bs, int ic, diff --git a/paddle/gserver/tests/MkldnnTester.cpp b/paddle/gserver/tests/MkldnnTester.cpp index ef99b384a9..59b3861df8 100644 --- a/paddle/gserver/tests/MkldnnTester.cpp +++ b/paddle/gserver/tests/MkldnnTester.cpp @@ -149,7 +149,7 @@ void MkldnnTester::checkBackwardWgts() { const MkldnnLayerPtr dnnlayer = std::dynamic_pointer_cast(dnnLayer_); CHECK(dnnlayer); - dnnlayer->cvtWgtToPaddle(); + dnnlayer->convertWeightsToPaddle(); for (size_t i = 0; i < parameters_[DNN].size(); ++i) { const VectorPtr& dnn = parameters_[DNN][i]->getBuf(PARAMETER_VALUE); const VectorPtr& ref = parameters_[REF][i]->getBuf(PARAMETER_VALUE); diff --git a/python/paddle/trainer/config_parser.py b/python/paddle/trainer/config_parser.py index dc07af343d..3213df5186 100644 --- a/python/paddle/trainer/config_parser.py +++ b/python/paddle/trainer/config_parser.py @@ -1614,13 +1614,13 @@ class FCLayer(LayerBase): error_clipping_threshold=None, **xargs): use_mkldnn = bool(int(g_command_config_args.get("use_mkldnn", 0))) + use_mkldnn_wgt = bool( + int(g_command_config_args.get("use_mkldnn_wgt", 0))) if use_mkldnn: self.layer_type = 'mkldnn_fc' config_assert( len(inputs) == 1, "MkldnnFCLayer support one and only one input!") - use_mkldnn_wgt = bool( - int(g_command_config_args.get("use_mkldnn_wgt", 0))) super(FCLayer, self).__init__( name, self.layer_type, size, inputs=inputs, **xargs) for input_index in xrange(len(self.inputs)): From e2ccbccb02132cef59373bb8ec52ddbbf3c7c61d Mon Sep 17 00:00:00 2001 From: qiaolongfei Date: Wed, 9 Aug 2017 19:49:37 +0800 Subject: [PATCH 17/55] support python test without installation python package --- cmake/generic.cmake | 2 +- python/CMakeLists.txt | 2 ++ 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/cmake/generic.cmake b/cmake/generic.cmake index 957c20bcf6..9f907a9dc2 100644 --- a/cmake/generic.cmake +++ b/cmake/generic.cmake @@ -411,7 +411,7 @@ function(py_test TARGET_NAME) set(multiValueArgs SRCS DEPS) cmake_parse_arguments(py_test "${options}" "${oneValueArgs}" "${multiValueArgs}" ${ARGN}) add_test(NAME ${TARGET_NAME} - COMMAND env PYTHONPATH=${PADDLE_PYTHON_PACKAGE_DIR} + COMMAND env PYTHONPATH=${PADDLE_PYTHON_LIB_DIR} python2 ${py_test_SRCS} WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}) endif() diff --git a/python/CMakeLists.txt b/python/CMakeLists.txt index b5030da8e7..fc8c6f6a42 100644 --- a/python/CMakeLists.txt +++ b/python/CMakeLists.txt @@ -1,6 +1,8 @@ set(OUTPUT_DIR "${CMAKE_CURRENT_BINARY_DIR}/build") +set(PADDLE_PYTHON_LIB_DIR "${OUTPUT_DIR}/lib") + file(GLOB TRAINER_PY_FILES . ./paddle/trainer/*.py) file(GLOB HELPERS_PY_FILES . ./paddle/trainer_config_helpers/*.py) file(GLOB UTILS_PY_FILES . ./paddle/utils/*.py) From 5e5c441245276a2696ac1f840ebd261c7c14cfd4 Mon Sep 17 00:00:00 2001 From: liaogang Date: Wed, 9 Aug 2017 20:16:16 +0800 Subject: [PATCH 18/55] Enable Python Unit Test before make install --- cmake/generic.cmake | 2 +- paddle/framework/CMakeLists.txt | 5 +++++ python/paddle/v2/framework/.gitignore | 1 + 3 files changed, 7 insertions(+), 1 deletion(-) create mode 100644 python/paddle/v2/framework/.gitignore diff --git a/cmake/generic.cmake b/cmake/generic.cmake index 957c20bcf6..2778b49128 100644 --- a/cmake/generic.cmake +++ b/cmake/generic.cmake @@ -411,7 +411,7 @@ function(py_test TARGET_NAME) set(multiValueArgs SRCS DEPS) cmake_parse_arguments(py_test "${options}" "${oneValueArgs}" "${multiValueArgs}" ${ARGN}) add_test(NAME ${TARGET_NAME} - COMMAND env PYTHONPATH=${PADDLE_PYTHON_PACKAGE_DIR} + COMMAND env PYTHONPATH=${CMAKE_SOURCE_DIR}/python:${CMAKE_SOURCE_DIR}/paddle:${PADDLE_PYTHON_PACKAGE_DIR} python2 ${py_test_SRCS} WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}) endif() diff --git a/paddle/framework/CMakeLists.txt b/paddle/framework/CMakeLists.txt index 6601918c90..b7b61b597f 100644 --- a/paddle/framework/CMakeLists.txt +++ b/paddle/framework/CMakeLists.txt @@ -35,6 +35,11 @@ py_proto_compile(framework_py_proto SRCS attribute.proto op_proto.proto op_desc. # Generate an empty __init__.py to make framework_py_proto as a valid python module. add_custom_target(framework_py_proto_init ALL COMMAND ${CMAKE_COMMAND} -E touch __init__.py) add_dependencies(framework_py_proto framework_py_proto_init) +add_custom_command(TARGET framework_py_proto POST_BUILD + COMMAND ${CMAKE_COMMAND} -E make_directory ${PROJ_ROOT}/python/paddle/v2/framework/proto + COMMAND cp *.py ${PROJ_ROOT}/python/paddle/v2/framework/proto/ + COMMENT "Copy generated python proto into directory paddle/v2/framework/proto." + WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}) cc_library(backward SRCS backward.cc DEPS net_op) cc_test(backward_test SRCS backward_test.cc DEPS backward) diff --git a/python/paddle/v2/framework/.gitignore b/python/paddle/v2/framework/.gitignore new file mode 100644 index 0000000000..2ff540d576 --- /dev/null +++ b/python/paddle/v2/framework/.gitignore @@ -0,0 +1 @@ +proto From 8f464a58984f8024afadab2920acf2b9c4a60d17 Mon Sep 17 00:00:00 2001 From: liaogang Date: Wed, 9 Aug 2017 20:20:42 +0800 Subject: [PATCH 19/55] update PROJ_ROOT --- cmake/generic.cmake | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cmake/generic.cmake b/cmake/generic.cmake index 2778b49128..6b0524021c 100644 --- a/cmake/generic.cmake +++ b/cmake/generic.cmake @@ -411,7 +411,7 @@ function(py_test TARGET_NAME) set(multiValueArgs SRCS DEPS) cmake_parse_arguments(py_test "${options}" "${oneValueArgs}" "${multiValueArgs}" ${ARGN}) add_test(NAME ${TARGET_NAME} - COMMAND env PYTHONPATH=${CMAKE_SOURCE_DIR}/python:${CMAKE_SOURCE_DIR}/paddle:${PADDLE_PYTHON_PACKAGE_DIR} + COMMAND env PYTHONPATH=${PROJ_ROOT}/python:${PROJ_ROOT}/paddle:${PADDLE_PYTHON_PACKAGE_DIR} python2 ${py_test_SRCS} WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}) endif() From 32e756ca033c6e32ba2f711e90dc22f54b874361 Mon Sep 17 00:00:00 2001 From: dangqingqing Date: Wed, 9 Aug 2017 20:45:36 +0800 Subject: [PATCH 20/55] fix test_KmaxSeqScore for only CPU compile. --- paddle/gserver/tests/test_KmaxSeqScore.cpp | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/paddle/gserver/tests/test_KmaxSeqScore.cpp b/paddle/gserver/tests/test_KmaxSeqScore.cpp index a51fe390c7..30aadae712 100644 --- a/paddle/gserver/tests/test_KmaxSeqScore.cpp +++ b/paddle/gserver/tests/test_KmaxSeqScore.cpp @@ -96,6 +96,11 @@ TEST(Layer, kmaxSeqScoreLayer) { MatrixPtr inValue = Matrix::create(subSeqStartPosition.back(), 1, false, false); + std::vector mode = {false}; +#ifndef PADDLE_ONLY_CPU + model.push_back(true); +#endif + for (auto hasSubseq : {false, true}) { vector> groundTruth; inValue->randomizeUniform(); @@ -104,7 +109,7 @@ TEST(Layer, kmaxSeqScoreLayer) { hasSubseq ? subSeqStartPosition : seqStartPosition, beamSize); - for (auto useGpu : {false, true}) { + for (auto useGpu : mode) { TestConfig config; config.layerConfig.set_type("kmax_seq_score"); config.layerConfig.set_beam_size(beamSize); From c7f245892eab275d9c60e3005ec8030168a0936d Mon Sep 17 00:00:00 2001 From: hedaoyuan Date: Wed, 9 Aug 2017 21:23:02 +0800 Subject: [PATCH 21/55] Fix some warning. --- paddle/math/CpuSparseMatrix.h | 4 ++++ paddle/math/SparseMatrix.h | 5 ++++- 2 files changed, 8 insertions(+), 1 deletion(-) diff --git a/paddle/math/CpuSparseMatrix.h b/paddle/math/CpuSparseMatrix.h index 9676f8864f..6ba795d5b7 100644 --- a/paddle/math/CpuSparseMatrix.h +++ b/paddle/math/CpuSparseMatrix.h @@ -302,6 +302,10 @@ public: bool isSparse() const { return true; } private: + using Matrix::mul; using Matrix::copyFrom; + using Matrix::rowMax; + using Matrix::print; + using Matrix::subMatrix; }; } // namespace paddle diff --git a/paddle/math/SparseMatrix.h b/paddle/math/SparseMatrix.h index f8d9ffc29f..8cd6b71f8f 100644 --- a/paddle/math/SparseMatrix.h +++ b/paddle/math/SparseMatrix.h @@ -14,8 +14,8 @@ limitations under the License. */ #pragma once #include -#include "Matrix.h" #include "CpuSparseMatrix.h" +#include "Matrix.h" namespace paddle { @@ -234,6 +234,9 @@ public: private: using Matrix::mul; using Matrix::copyFrom; + using Matrix::rowMax; + using Matrix::print; + using Matrix::subMatrix; }; } // namespace paddle From b2bd67133aa609225ea46d12d1f091340ab000e4 Mon Sep 17 00:00:00 2001 From: tensor-tang Date: Wed, 9 Aug 2017 22:52:47 +0800 Subject: [PATCH 22/55] rename and refine functions --- paddle/gserver/layers/MkldnnBase.h | 16 +- paddle/gserver/layers/MkldnnFcLayer.cpp | 167 ++++++++++++++---- paddle/gserver/layers/MkldnnFcLayer.h | 21 ++- paddle/gserver/layers/MkldnnLayer.cpp | 222 ------------------------ paddle/gserver/layers/MkldnnLayer.h | 78 ++++----- paddle/gserver/tests/MkldnnTester.cpp | 22 ++- paddle/gserver/tests/MkldnnTester.h | 4 +- paddle/gserver/tests/test_Mkldnn.cpp | 13 +- python/paddle/trainer/config_parser.py | 7 +- 9 files changed, 217 insertions(+), 333 deletions(-) delete mode 100644 paddle/gserver/layers/MkldnnLayer.cpp diff --git a/paddle/gserver/layers/MkldnnBase.h b/paddle/gserver/layers/MkldnnBase.h index 260dbe45e4..63fd67a850 100644 --- a/paddle/gserver/layers/MkldnnBase.h +++ b/paddle/gserver/layers/MkldnnBase.h @@ -19,12 +19,12 @@ limitations under the License. */ namespace paddle { typedef enum { - DNN_BASE = 1, - DNN_TESTS = 1, - DNN_SIZES, - DNN_FMTS, - DNN_ALL, -} DNN_LOG_LEVEL; + MKLDNN_BASE = 1, // basical info of MKLDNN + MKLDNN_TESTS = 1, // gtest info of MKLDNN + MKLDNN_SIZES = 2, // size info of MKLDNN + MKLDNN_FMTS = 3, // format info of MKLDNN + MKLDNN_ALL = 4, // show all info of MKLDNN +} MKLDNN_LOG_LEVEL; /** * @brief MKLDNN CPU engine. @@ -68,7 +68,7 @@ public: /** * @brief Submit stream * @param prims The primitives vector - * block Waiting for the stream to complete + * @param block Waiting for the stream to complete */ void submit(std::vector& prims, bool block = true) { resetState(); @@ -84,8 +84,8 @@ public: return; } // TODO(TJ): change me when mkldnn have method to reset this state - stream_.reset(new mkldnn::stream(mkldnn::stream::kind::eager)); // stream_.reset(new mkldnn::stream(mkldnn::stream::kind::lazy)); + stream_.reset(new mkldnn::stream(mkldnn::stream::kind::eager)); ready_ = true; } diff --git a/paddle/gserver/layers/MkldnnFcLayer.cpp b/paddle/gserver/layers/MkldnnFcLayer.cpp index e4c4d4675d..f89db169ef 100644 --- a/paddle/gserver/layers/MkldnnFcLayer.cpp +++ b/paddle/gserver/layers/MkldnnFcLayer.cpp @@ -16,6 +16,12 @@ limitations under the License. */ #include "paddle/utils/Logging.h" #include "paddle/utils/Stat.h" +using namespace mkldnn; // NOLINT +typedef memory::format format; +typedef inner_product_forward fc_fwd; +typedef inner_product_backward_weights fc_bwdWgt; +typedef inner_product_backward_data fc_bwdData; + namespace paddle { REGISTER_LAYER(mkldnn_fc, MkldnnFcLayer); @@ -26,7 +32,7 @@ bool MkldnnFcLayer::init(const LayerMap& layerMap, return false; } - CHECK_EQ(inputLayers_.size(), 1) << "Only support one input layer yet!"; + CHECK_EQ(inputLayers_.size(), 1) << "Only support one input layer yet"; CHECK_EQ(inputLayers_.size(), parameters_.size()); CHECK(!parameters_[0]->isSparse()) << "Do not support sparse yet"; @@ -63,14 +69,14 @@ void MkldnnFcLayer::convertWeightsFromPaddle() { MatrixPtr paddleWgt = Matrix::create( weight_->getW()->getData(), iLayerSize_, oc_, false, false); + // TODO(TJ): remove this print when do not need differ weights std::ostringstream ostr; paddleWgt->print(ostr); - VLOG(DNN_ALL) << "Initial Weight from paddle: " << std::endl << ostr.str(); + VLOG(MKLDNN_ALL) << "Initial Weight from paddle: " << std::endl << ostr.str(); // The mkldnn weight is transposed from initial paddle matrix MatrixPtr paddleWgtT; paddleWgt->transpose(paddleWgtT, true); - weight_->getW()->copyFrom(*paddleWgtT); hasInitedWgt_ = true; } @@ -101,6 +107,10 @@ void MkldnnFcLayer::reshape() { if (iw_ == 0) { iw_ = 1; } + hasSpatial_ = true; + if (ih_ == 1 && iw_ == 1) { + hasSpatial_ = false; + } CHECK_EQ(iLayerSize_, inputLayers_[0]->getSize()); ic_ = iLayerSize_ / (ih_ * iw_); CHECK_EQ(size_t(ic_ * ih_ * iw_), iLayerSize_) << "not divisible"; @@ -111,6 +121,114 @@ void MkldnnFcLayer::reshape() { output_.setFrameHeight(oh_); output_.setFrameWidth(ow_); resetOutput(bs_, oc_); + + // reset mkldnn forward + resetFwd(); + needResetBwd_ = true; + + convertWeightsFromPaddle(); +} + +void MkldnnFcLayer::resetFwd() { + bool hasBias = biases_ && biases_->getW(); + real* iData = getInputValue(0)->getData(); + real* oData = getOutputValue()->getData(); + real* wData = weight_->getW()->getData(); + real* bData = hasBias ? biases_->getW()->getData() : NULL; + + // TODO(TJ): below create should be covered in MkldnnMatrix + // create memory desc + memory::desc iMD = hasSpatial_ ? createMD({bs_, ic_, ih_, iw_}, format::nchw) + : createMD({bs_, ic_}, format::nc); + memory::desc wMD = hasSpatial_ ? createMD({oc_, ic_, ih_, iw_}, format::oihw) + : createMD({oc_, ic_}, format::oi); + memory::desc bMD = bData != NULL ? createMD({oc_}, format::x) + : createMD({}, format::format_undef); + memory::desc oMD = createMD({bs_, oc_}, format::nc); + + // create memory primitive desc and memory self + inVal_.reset(new memory(memory::primitive_desc(iMD, engine_), iData)); + wgtVal_.reset(new memory(memory::primitive_desc(wMD, engine_), wData)); + outVal_.reset(new memory(memory::primitive_desc(oMD, engine_), oData)); + + prop_kind pk = prop_kind::forward; + fc_fwd::desc fwdDesc = bData != NULL ? fc_fwd::desc(pk, iMD, wMD, bMD, oMD) + : fc_fwd::desc(pk, iMD, wMD, oMD); + fc_fwd::primitive_desc fwdPD = fc_fwd::primitive_desc(fwdDesc, engine_); + + if (bData != NULL) { + biasVal_.reset(new memory(memory::primitive_desc(bMD, engine_), bData)); + fwd_.reset(new fc_fwd(fwdPD, *inVal_, *wgtVal_, *biasVal_, *outVal_)); + } else { + fwd_.reset(new fc_fwd(fwdPD, *inVal_, *wgtVal_, *outVal_)); + } + pipelineFwd_.clear(); + pipelineFwd_.push_back(*fwd_); +} + +void MkldnnFcLayer::resetBwd() { + if (!needResetBwd_) { + return; + } + needResetBwd_ = false; + + bool hasBias = biases_ && biases_->getWGrad(); + real* iData = getInputValue(0)->getData(); + real* iDiff = getInputGrad(0) != nullptr ? getInputGrad(0)->getData() : NULL; + real* oDiff = getOutputGrad()->getData(); + real* wDiff = weight_->getWGrad()->getData(); + real* bDiff = hasBias ? biases_->getWGrad()->getData() : NULL; + + /// backward weight + // create memory desc for backward memory + memory::desc iMD = hasSpatial_ ? createMD({bs_, ic_, ih_, iw_}, format::nchw) + : createMD({bs_, ic_}, format::nc); + memory::desc wMD = hasSpatial_ ? createMD({oc_, ic_, ih_, iw_}, format::oihw) + : createMD({oc_, ic_}, format::oi); + memory::desc oMD = createMD({bs_, oc_}, format::nc); + memory::desc bMD = bDiff != NULL ? createMD({oc_}, format::x) + : createMD({}, format::format_undef); + + if (inVal_) { + // update data + inVal_->set_data_handle(iData); + } else { + inVal_.reset(new memory(memory::primitive_desc(iMD, engine_), iData)); + } + + // create memory primitive desc and memory self + wgtGrad_.reset(new memory(memory::primitive_desc(wMD, engine_), wDiff)); + outGrad_.reset(new memory(memory::primitive_desc(oMD, engine_), oDiff)); + + fc_fwd::desc fwdDesc = fc_fwd::desc(prop_kind::forward, iMD, wMD, oMD); + fc_fwd::primitive_desc fwdPD = fc_fwd::primitive_desc(fwdDesc, engine_); + fc_bwdWgt::desc bwdWgtDesc = bDiff != NULL + ? fc_bwdWgt::desc(iMD, wMD, bMD, oMD) + : fc_bwdWgt::desc(iMD, wMD, oMD); + fc_bwdWgt::primitive_desc bwdWgtPD = + fc_bwdWgt::primitive_desc(bwdWgtDesc, engine_, fwdPD); + + if (bDiff != NULL) { + biasGrad_.reset(new memory(memory::primitive_desc(bMD, engine_), bDiff)); + bwdWgt_.reset( + new fc_bwdWgt(bwdWgtPD, *inVal_, *outGrad_, *wgtGrad_, *biasGrad_)); + } else { + bwdWgt_.reset(new fc_bwdWgt(bwdWgtPD, *inVal_, *outGrad_, *wgtGrad_)); + } + pipelineBwd_.clear(); + pipelineBwd_.push_back(*bwdWgt_); + + /// backward data + if (iDiff == NULL) { + return; + } + fc_bwdData::desc bwdDataDesc = fc_bwdData::desc(iMD, wMD, oMD); + fc_bwdData::primitive_desc bwdDataPD = + fc_bwdData::primitive_desc(bwdDataDesc, engine_, fwdPD); + inGrad_.reset(new memory(memory::primitive_desc(iMD, engine_), iDiff)); + CHECK(wgtVal_) << "Should have weight memory"; + bwdData_.reset(new fc_bwdData(bwdDataPD, *outGrad_, *wgtVal_, *inGrad_)); + pipelineBwd_.push_back(*bwdData_); } void MkldnnFcLayer::forward(PassType passType) { @@ -119,12 +237,14 @@ void MkldnnFcLayer::forward(PassType passType) { { REGISTER_TIMER_INFO("mkldnn_FwdTimer", getName().c_str()); - real* input = getInputValue(0)->getData(); - real* output = getOutputValue()->getData(); - real* wgt = weight_->getW()->getData(); - bool hasBias = biases_ && biases_->getW(); - real* bias = hasBias ? biases_->getW()->getData() : NULL; - mkldnnForwardFC(bs_, ic_, ih_, iw_, input, oc_, output, wgt, bias); + + // update input data + // since it might be changed if this is after data layer + real* iData = getInputValue(0)->getData(); + inVal_->set_data_handle(iData); + + // just submit forward pipeline + stream_->submit(pipelineFwd_); } /* activation */ { @@ -139,33 +259,22 @@ void MkldnnFcLayer::backward(const UpdateCallback& callback) { backwardActivation(); } - bool hasBias = biases_ && biases_->getWGrad(); { REGISTER_TIMER_INFO("mkldnn_bwdTimer", getName().c_str()); - real* inVal = getInputValue(0)->getData(); - real* inGrad = - getInputGrad(0) != nullptr ? getInputGrad(0)->getData() : NULL; - real* outGrad = getOutputGrad()->getData(); - real* wgtGrad = weight_->getWGrad()->getData(); - real* wgtVal = weight_->getW()->getData(); - real* biasGrad = hasBias ? biases_->getWGrad()->getData() : NULL; - mkldnnBackwardFC(bs_, - ic_, - ih_, - iw_, - inGrad, - inVal, - oc_, - outGrad, - wgtGrad, - wgtVal, - biasGrad); + resetBwd(); + + // update diff + real* oDiff = getOutputGrad()->getData(); + outGrad_->set_data_handle(oDiff); + + // just sumbmit backward pipeline + stream_->submit(pipelineBwd_); } { REGISTER_TIMER_INFO("WeightUpdate", getName().c_str()); weight_->getParameterPtr()->incUpdate(callback); - if (hasBias) { + if (biases_ && biases_->getWGrad()) { biases_->getParameterPtr()->incUpdate(callback); } } diff --git a/paddle/gserver/layers/MkldnnFcLayer.h b/paddle/gserver/layers/MkldnnFcLayer.h index f891052284..c4c0fa1c41 100644 --- a/paddle/gserver/layers/MkldnnFcLayer.h +++ b/paddle/gserver/layers/MkldnnFcLayer.h @@ -30,6 +30,7 @@ protected: size_t iLayerSize_; // == ic * ih * iw bool hasInitedWgt_; + bool hasSpatial_; // fc weight and bias std::unique_ptr weight_; @@ -37,7 +38,7 @@ protected: public: explicit MkldnnFcLayer(const LayerConfig& config) - : MkldnnLayer(config), hasInitedWgt_(false) {} + : MkldnnLayer(config), hasInitedWgt_(false), hasSpatial_(true) {} ~MkldnnFcLayer() {} @@ -52,7 +53,25 @@ public: void backward(const UpdateCallback& callback) override; +protected: + /** + * reshape the input image sizes + * and reset output buffer size + * and reset mkldnn forward + */ void reshape(); + + /** + * reset the forward primitve and memory + * only would be called when input size changes + */ + void resetFwd(); + + /** + * reset the backward primitve and memory for mkldnn fc + * only would be called when needed + */ + void resetBwd(); }; } // namespace paddle diff --git a/paddle/gserver/layers/MkldnnLayer.cpp b/paddle/gserver/layers/MkldnnLayer.cpp deleted file mode 100644 index 6bd2b15a17..0000000000 --- a/paddle/gserver/layers/MkldnnLayer.cpp +++ /dev/null @@ -1,222 +0,0 @@ -/* Copyright (c) 2017 PaddlePaddle Authors. All Rights Reserve. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#include "MkldnnLayer.h" - -using mem = mkldnn::memory; // NOLINT -typedef mem::format format; -typedef mkldnn::inner_product_forward fc_fwd; -typedef mkldnn::inner_product_backward_weights fc_bwdWgt; -typedef mkldnn::inner_product_backward_data fc_bwdData; - -namespace paddle { - -bool MkldnnLayer::init(const LayerMap& layerMap, - const ParameterMap& parameterMap) { - if (!Layer::init(layerMap, parameterMap)) { - return false; - } - - CHECK(FLAGS_use_mkldnn) << "MkldnnLayers only support use_mkldnn." - << "Please set WITH_MKLDNN=ON " - << "and set use_mkldnn=True"; - stream_.reset(new MkldnnStream()); - engine_ = CpuEngine::Instance().getEngine(); - - // TODO(TJ): deivecId - return true; -} - -void MkldnnLayer::resetForwardFC(int bs, - int ic, - int ih, - int iw, - real* botData, - int oc, - real* topData, - real* wgtData, - real* biasData) { - bool hasSpatial = ih == 1 && iw == 1 ? false : true; - mem::desc botMD = hasSpatial ? createMD({bs, ic, ih, iw}, format::nchw) - : createMD({bs, ic}, format::nc); - mem::desc wgtMD = hasSpatial ? createMD({oc, ic, ih, iw}, format::oihw) - : createMD({oc, ic}, format::oi); - mem::desc biasMD = biasData != NULL ? createMD({oc}, format::x) - : createMD({}, format::format_undef); - mem::desc topMD = createMD({bs, oc}, format::nc); - - mem::primitive_desc botPD = mem::primitive_desc(botMD, engine_); - if (inVal_ && inVal_->get_primitive_desc() == botPD) { - return; - } - - inVal_.reset(new mem(botPD, botData)); - wgtVal_.reset(new mem(mem::primitive_desc(wgtMD, engine_), wgtData)); - outVal_.reset(new mem(mem::primitive_desc(topMD, engine_), topData)); - - mkldnn::prop_kind pk = mkldnn::prop_kind::forward; - fc_fwd::desc fwdDesc = biasData != NULL - ? fc_fwd::desc(pk, botMD, wgtMD, biasMD, topMD) - : fc_fwd::desc(pk, botMD, wgtMD, topMD); - fc_fwd::primitive_desc fwdPD = fc_fwd::primitive_desc(fwdDesc, engine_); - - if (biasData != NULL) { - biasVal_.reset(new mem(mem::primitive_desc(biasMD, engine_), biasData)); - fwd_.reset(new fc_fwd(fwdPD, *inVal_, *wgtVal_, *biasVal_, *outVal_)); - } else { - fwd_.reset(new fc_fwd(fwdPD, *inVal_, *wgtVal_, *outVal_)); - } - pipelineFwd_.clear(); - pipelineFwd_.push_back(*fwd_); -} - -void MkldnnLayer::mkldnnForwardFC(int bs, - int ic, - int ih, - int iw, - real* botData, - int oc, - real* topData, - real* wgtData, - real* biasData) { - // if input size changed, reset it - resetForwardFC(bs, ic, ih, iw, botData, oc, topData, wgtData, biasData); - - this->convertWeightsFromPaddle(); - - // update input, since the data might be changed if this is after data layer - inVal_->set_data_handle(botData); - - // just forward - stream_->submit(pipelineFwd_); -} - -void MkldnnLayer::resetBackwardFC(int bs, - int ic, - int ih, - int iw, - real* botDiff, - real* botData, - int oc, - real* topDiff, - real* wgtDiff, - real* wgtData, - real* biasDiff) { - bool hasSpatial = ih == 1 && iw == 1 ? false : true; - - // backward weight - mem::desc botMD = hasSpatial ? createMD({bs, ic, ih, iw}, format::nchw) - : createMD({bs, ic}, format::nc); - mem::desc wgtMD = hasSpatial ? createMD({oc, ic, ih, iw}, format::oihw) - : createMD({oc, ic}, format::oi); - mem::desc topMD = createMD({bs, oc}, format::nc); - mem::desc biasMD = biasDiff != NULL ? createMD({oc}, format::x) - : createMD({}, format::format_undef); - - mem::primitive_desc topPD = mem::primitive_desc(botMD, engine_); - if (outGrad_ && outGrad_->get_primitive_desc() == topPD) { - return; - } - - if (inVal_) { - // update data - inVal_->set_data_handle(botData); - } else { - inVal_.reset(new mem(mem::primitive_desc(botMD, engine_), botData)); - } - wgtGrad_.reset(new mem(mem::primitive_desc(wgtMD, engine_), wgtDiff)); - outGrad_.reset(new mem(topPD, topDiff)); - - fc_fwd::desc fwdDesc = - fc_fwd::desc(mkldnn::prop_kind::forward, botMD, wgtMD, topMD); - fc_fwd::primitive_desc fwdPD = fc_fwd::primitive_desc(fwdDesc, engine_); - fc_bwdWgt::desc bwdWgtDesc = - biasDiff != NULL ? fc_bwdWgt::desc(botMD, wgtMD, biasMD, topMD) - : fc_bwdWgt::desc(botMD, wgtMD, topMD); - fc_bwdWgt::primitive_desc bwdWgtPD = - fc_bwdWgt::primitive_desc(bwdWgtDesc, engine_, fwdPD); - - if (biasDiff != NULL) { - biasGrad_.reset(new mem(mem::primitive_desc(biasMD, engine_), biasDiff)); - bwdWgt_.reset( - new fc_bwdWgt(bwdWgtPD, *inVal_, *outGrad_, *wgtGrad_, *biasGrad_)); - } else { - bwdWgt_.reset(new fc_bwdWgt(bwdWgtPD, *inVal_, *outGrad_, *wgtGrad_)); - } - pipelineBwd_.clear(); - pipelineBwd_.push_back(*bwdWgt_); - - // backward data - if (botDiff == NULL) { - return; - } - - fc_bwdData::desc bwdDataDesc = fc_bwdData::desc(botMD, wgtMD, topMD); - fc_bwdData::primitive_desc bwdDataPD = - fc_bwdData::primitive_desc(bwdDataDesc, engine_, fwdPD); - inGrad_.reset(new mem(mem::primitive_desc(botMD, engine_), botDiff)); - if (wgtVal_) { - // update data - wgtVal_->set_data_handle(wgtData); - } else { - wgtVal_.reset(new mem(mem::primitive_desc(wgtMD, engine_), wgtData)); - } - bwdData_.reset(new fc_bwdData(bwdDataPD, *outGrad_, *wgtVal_, *inGrad_)); - pipelineBwd_.push_back(*bwdData_); -} - -void MkldnnLayer::mkldnnBackwardFC(int bs, - int ic, - int ih, - int iw, - real* botDiff, - real* botData, - int oc, - real* topDiff, - real* wgtDiff, - real* wgtData, - real* biasDiff) { - // if input size changed, reset it - resetBackwardFC(bs, - ic, - ih, - iw, - botDiff, - botData, - oc, - topDiff, - wgtDiff, - wgtData, - biasDiff); - - // update data - outGrad_->set_data_handle(topDiff); - - stream_->submit(pipelineBwd_); -} - -void MkldnnLayer::printSizeInfo() { - VLOG(DNN_SIZES) << getName() << ": bs: " << bs_ << ", ic: " << ic_ - << ", ih: " << ih_ << ", iw: " << iw_ << ", oc: " << oc_ - << ", oh: " << oh_ << ", ow: " << ow_; -} - -mem::desc MkldnnLayer::createMD(mem::dims dims, - mem::format fmt, - mem::data_type type) { - // TODO(TJ): isFmtSuppoted(fmt) - return mem::desc(dims, type, fmt); -} - -} // namespace paddle diff --git a/paddle/gserver/layers/MkldnnLayer.h b/paddle/gserver/layers/MkldnnLayer.h index e5c93500c7..620bdfc984 100644 --- a/paddle/gserver/layers/MkldnnLayer.h +++ b/paddle/gserver/layers/MkldnnLayer.h @@ -40,6 +40,9 @@ protected: // output image channel, height and width int oc_, oh_, ow_; + // backward also need reset after reset forward handle + bool needResetBwd_; + // mkldnn engine, stream and primivtives mkldnn::engine engine_; std::shared_ptr stream_; @@ -50,8 +53,6 @@ protected: std::vector pipelineBwd_; // TODO(TJ): change below memory as MkldnnMatrixPtr type - // input == bottom, output == top - // value == data, grad == diff std::shared_ptr inVal_; std::shared_ptr inGrad_; std::shared_ptr outVal_; @@ -71,6 +72,7 @@ public: oc_(0), oh_(0), ow_(0), + needResetBwd_(true), engine_(mkldnn::engine::cpu, 0), stream_(nullptr), fwd_(nullptr), @@ -79,9 +81,21 @@ public: ~MkldnnLayer() {} - virtual bool init(const LayerMap& layerMap, const ParameterMap& parameterMap); + virtual bool init(const LayerMap& layerMap, + const ParameterMap& parameterMap) { + if (!Layer::init(layerMap, parameterMap)) { + return false; + } + + CHECK(FLAGS_use_mkldnn) << "MkldnnLayers only support use_mkldnn." + << "Please set WITH_MKLDNN=ON " + << "and set use_mkldnn=True"; + stream_.reset(new MkldnnStream()); + engine_ = CpuEngine::Instance().getEngine(); - virtual void printSizeInfo(); + // TODO(TJ): deivecId + return true; + } /** * convert weight from paddle format to mkldnn format @@ -95,56 +109,24 @@ public: */ virtual void convertWeightsToPaddle() {} - void resetForwardFC(int bs, - int ic, - int ih, - int iw, - real* botData, - int oc, - real* topData, - real* wgtData, - real* biasData); - - void mkldnnForwardFC(int bs, - int ic, - int ih, - int iw, - real* botData, - int oc, - real* topData, - real* wgtData, - real* biasData); - - void resetBackwardFC(int bs, - int ic, - int ih, - int iw, - real* botDiff, - real* botData, - int oc, - real* topDiff, - real* wgtDiff, - real* wgtData, - real* biasDiff); - - void mkldnnBackwardFC(int bs, - int ic, - int ih, - int iw, - real* botDiff, - real* botData, - int oc, - real* topDiff, - real* wgtDiff, - real* wgtData, - real* biasDiff); + /** + * print info about sizes + */ + virtual void printSizeInfo() { + VLOG(MKLDNN_SIZES) << getName() << ": bs: " << bs_ << ", ic: " << ic_ + << ", ih: " << ih_ << ", iw: " << iw_ << ", oc: " << oc_ + << ", oh: " << oh_ << ", ow: " << ow_; + } // TODO(TJ): move to MkldnnMatrix // create memory desc inline mkldnn::memory::desc createMD( mkldnn::memory::dims dims, mkldnn::memory::format fmt, - mkldnn::memory::data_type type = mkldnn::memory::data_type::f32); + mkldnn::memory::data_type type = mkldnn::memory::data_type::f32) { + // TODO(TJ): isFmtSuppoted(fmt) + return mkldnn::memory::desc(dims, type, fmt); + } }; } // namespace paddle diff --git a/paddle/gserver/tests/MkldnnTester.cpp b/paddle/gserver/tests/MkldnnTester.cpp index 59b3861df8..9232e2fdcd 100644 --- a/paddle/gserver/tests/MkldnnTester.cpp +++ b/paddle/gserver/tests/MkldnnTester.cpp @@ -118,7 +118,7 @@ void MkldnnTester::checkForward() { printTopDatas(); double delta = compareMatrix(testLayers_[DNN]->getOutputValue(), testLayers_[REF]->getOutputValue()); - VLOG(DNN_ALL) << "Check Forward"; + VLOG(MKLDNN_ALL) << "Check Forward"; EXPECT_LE(fabs(delta), eps_); } @@ -162,7 +162,7 @@ void MkldnnTester::checkBackwardWgts() { EXPECT_LE(fabs(delta), eps_); } - VLOG(DNN_ALL) << "Restore dnn weights before comapre"; + VLOG(MKLDNN_ALL) << "Restore dnn weights before comapre"; restoreWgt(dnnWgts, parameters_[DNN]); } @@ -275,8 +275,8 @@ double MkldnnTester::getDelta(const real* d1, EXPECT_TRUE(std::isnormal(sum)); EXPECT_FALSE(std::isinf(sum)); EXPECT_FALSE(std::isnan(delta)); - VLOG(DNN_ALL) << "reference avg data: " << sum / len - << ", delta: " << delta / sum << ", failCnt:" << failCnt; + VLOG(MKLDNN_ALL) << "reference avg data: " << sum / len + << ", delta: " << delta / sum << ", failCnt:" << failCnt; return (failCnt / (float)len) > failRate ? maxOut : delta / sum; } @@ -306,10 +306,8 @@ void MkldnnTester::runOnce() { // clear buffers // ref code will addto the diff, dnn code will writeto it + // and clearTopDatas() and clearWgtDiffs() should be coverd by test layers clearBotDiffs(REF); - // below two should be coverd by test layers - // clearTopDatas(); - // clearWgtDiffs(); } void MkldnnTester::run(const TestConfig& dnn, @@ -321,8 +319,8 @@ void MkldnnTester::run(const TestConfig& dnn, float epsilon, bool log, int level) { - VLOG(DNN_TESTS) << "Test MKLDNN functionality: " << dnn.layerConfig.type() - << " vs " << ref.layerConfig.type(); + VLOG(MKLDNN_TESTS) << "Test MKLDNN functionality: " << dnn.layerConfig.type() + << " vs " << ref.layerConfig.type(); ih_ = inputImgH; iw_ = inputImgW; iter_ = iter; @@ -338,14 +336,14 @@ void MkldnnTester::run(const TestConfig& dnn, clearWgtDiffs(); clearBotDiffs(); for (size_t i = 0; i < iter_; ++i) { - VLOG(DNN_TESTS) << "Check Iteration " << i; + VLOG(MKLDNN_TESTS) << "Check Iteration " << i; runOnce(); } // Then test FLAGS_use_mkldnn_wgt = true FLAGS_use_mkldnn_wgt = true; // after run once the mkldnn weight has been stored in dnnlayer - // then save the weigths and restart again + // then save the weights and restart again vector dnnWgts, refWgts; CHECK_EQ(parameters_[DNN].size(), parameters_[REF].size()); saveWgt(parameters_[DNN], dnnWgts); @@ -361,7 +359,7 @@ void MkldnnTester::run(const TestConfig& dnn, clearBotDiffs(); for (size_t i = 0; i < iter_; ++i) { - VLOG(DNN_TESTS) << "Check Iteration " << i; + VLOG(MKLDNN_TESTS) << "Check Iteration " << i; runOnce(); } } diff --git a/paddle/gserver/tests/MkldnnTester.h b/paddle/gserver/tests/MkldnnTester.h index 8b3049b5c2..7d1db870d1 100644 --- a/paddle/gserver/tests/MkldnnTester.h +++ b/paddle/gserver/tests/MkldnnTester.h @@ -58,7 +58,7 @@ public: iter_ = iter; eps_ = epsilon; log_ = false; - lvl_ = DNN_ALL; + lvl_ = MKLDNN_ALL; } ~MkldnnTester() {} @@ -72,7 +72,7 @@ public: size_t iter = 3, float epsilon = 1e-4, bool log = false, - int level = DNN_ALL); + int level = MKLDNN_ALL); void setLogLevel(int lvl) { lvl_ = lvl; } private: diff --git a/paddle/gserver/tests/test_Mkldnn.cpp b/paddle/gserver/tests/test_Mkldnn.cpp index 0516a059de..8e4a8595d3 100644 --- a/paddle/gserver/tests/test_Mkldnn.cpp +++ b/paddle/gserver/tests/test_Mkldnn.cpp @@ -23,7 +23,6 @@ using namespace paddle; // NOLINT DECLARE_bool(thread_local_rand_use_global_seed); DECLARE_bool(use_gpu); DECLARE_bool(use_mkldnn); -DECLARE_bool(use_mkldnn_wgt); struct testFCDesc { int bs; @@ -56,12 +55,12 @@ void testFcLayer(const testFCDesc& pm) { } TEST(MkldnnLayer, fcLayer) { - testFcLayer({2, 2, 3, 1, 1}); - testFcLayer({3, 7, 19, 1, 1}); - testFcLayer({8, 16, 32, 13, 13}); - testFcLayer({4, 12, 18, 13, 11}); - testFcLayer({2, 64, 32, 16, 16}); - testFcLayer({15, 3, 6, 16, 16}); + testFcLayer({/*bs*/ 2, /*ic*/ 2, /*oc*/ 3, /*ih*/ 1, /*iw*/ 1}); + testFcLayer({/*bs*/ 3, /*ic*/ 7, /*oc*/ 19, /*ih*/ 1, /*iw*/ 1}); + testFcLayer({/*bs*/ 8, /*ic*/ 16, /*oc*/ 32, /*ih*/ 13, /*iw*/ 13}); + testFcLayer({/*bs*/ 4, /*ic*/ 12, /*oc*/ 18, /*ih*/ 13, /*iw*/ 11}); + testFcLayer({/*bs*/ 2, /*ic*/ 64, /*oc*/ 32, /*ih*/ 16, /*iw*/ 16}); + testFcLayer({/*bs*/ 15, /*ic*/ 3, /*oc*/ 6, /*ih*/ 16, /*iw*/ 16}); } // TODO(TJ): add branch test diff --git a/python/paddle/trainer/config_parser.py b/python/paddle/trainer/config_parser.py index 3213df5186..da99e5bd53 100644 --- a/python/paddle/trainer/config_parser.py +++ b/python/paddle/trainer/config_parser.py @@ -1626,15 +1626,14 @@ class FCLayer(LayerBase): for input_index in xrange(len(self.inputs)): input_layer = self.get_input_layer(input_index) psize = self.config.size * input_layer.size + dims = [input_layer.size, self.config.size] format = self.inputs[input_index].format sparse = format == "csr" or format == "csc" if use_mkldnn: config_assert(not sparse, "MkldnnFCLayer do not support sparse format yet") - if use_mkldnn and use_mkldnn_wgt: - dims = [self.config.size, input_layer.size] - else: - dims = [input_layer.size, self.config.size] + if use_mkldnn_wgt: + dims = [self.config.size, input_layer.size] if sparse: psize = self.inputs[input_index].nnz else: From d4e4cebf5f95c0edd1788d81780491cd90e18236 Mon Sep 17 00:00:00 2001 From: zchen0211 Date: Wed, 9 Aug 2017 11:42:40 -0700 Subject: [PATCH 23/55] fix all coding-style problems --- paddle/operators/gather.h | 20 +++----------------- paddle/operators/gather_test.cc | 2 -- 2 files changed, 3 insertions(+), 19 deletions(-) diff --git a/paddle/operators/gather.h b/paddle/operators/gather.h index 8b02156545..0c73717d38 100644 --- a/paddle/operators/gather.h +++ b/paddle/operators/gather.h @@ -20,13 +20,10 @@ limitations under the License. */ #include "paddle/framework/tensor.h" #include "paddle/platform/place.h" -using paddle::framework::Tensor; -using paddle::framework::DDim; - namespace paddle { namespace operators { -/* Implementation of CPU copy */ +// Implementation of CPU copy template void CPUGather(const T* params, const int* indices, const int slice_size, const int index_size, T* output) { @@ -34,15 +31,11 @@ void CPUGather(const T* params, const int* indices, const int slice_size, for (size_t i = 0; i < index_size; ++i) { int index_ = indices[i]; - // copy src[index_] to output[i] memcpy(output + i * slice_size, params + index_ * slice_size, slice_bytes); } } -/* Implementation of GPU copy: - I suppose the GPUDevice& d, contains gpu_id and thread_id - d = cuda_stream(gpu_id_, stream_id_); -*/ +// Implementation of GPU copy: template void GPUGather(const T* src, const int* index, const int slice_size, const int index_size, T* output); @@ -62,7 +55,7 @@ void Gather(const platform::Place& place, const paddle::framework::Tensor* src, int index_size = index->dims()[0]; auto src_dims = src->dims(); - DDim output_dims(src_dims); + paddle::framework::DDim output_dims(src_dims); output_dims[0] = index_size; // slice size @@ -73,13 +66,6 @@ void Gather(const platform::Place& place, const paddle::framework::Tensor* src, if (platform::is_cpu_place(place)) { CPUGather(src->data(), index->data(), slice_size, index_size, output->data()); - } else { - // init for GPU - // output_arr = output->mutable_data(output_dims, platform::GPUPlace()); - // how to specialize device?? - // GPUGather( - // d, src->data(), index->data(), slice_size, - // new_tensor->mutable_data()); } } diff --git a/paddle/operators/gather_test.cc b/paddle/operators/gather_test.cc index 5d84b7b5f3..5de748ec46 100644 --- a/paddle/operators/gather_test.cc +++ b/paddle/operators/gather_test.cc @@ -29,7 +29,6 @@ TEST(Gather, GatherData) { Tensor* src = new Tensor(); Tensor* index = new Tensor(); Tensor* output = new Tensor(); - // src.Resize(make_ddim({3, 4})); int* p_src = nullptr; int* p_index = nullptr; @@ -40,7 +39,6 @@ TEST(Gather, GatherData) { p_index[0] = 1; p_index[1] = 0; - // gather int* p_output = output->mutable_data(make_ddim({2, 4}), CPUPlace()); Gather(CPUPlace(), src, index, output); From aaddf5f6940768b827f03305e86da557ab24db65 Mon Sep 17 00:00:00 2001 From: liaogang Date: Thu, 10 Aug 2017 08:25:22 +0800 Subject: [PATCH 24/55] test on CI --- paddle/scripts/docker/build.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/paddle/scripts/docker/build.sh b/paddle/scripts/docker/build.sh index 44442be472..f70583c641 100644 --- a/paddle/scripts/docker/build.sh +++ b/paddle/scripts/docker/build.sh @@ -74,11 +74,11 @@ cat < Date: Thu, 10 Aug 2017 08:52:10 +0800 Subject: [PATCH 25/55] add python unit test dependencies --- Dockerfile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Dockerfile b/Dockerfile index 8ac123bf9c..c7fbe12c1b 100644 --- a/Dockerfile +++ b/Dockerfile @@ -64,7 +64,7 @@ RUN pip install --upgrade pip && \ pip install -U sphinx-rtd-theme==0.1.9 recommonmark && \ pip install pre-commit 'requests==2.9.2' 'ipython==5.3.0' && \ pip install 'ipykernel==4.6.0' 'jupyter==1.0.0' && \ - pip install rarfile + pip install rarfile 'scipy>=0.19.0' 'nltk>=3.2.2' # To fix https://github.com/PaddlePaddle/Paddle/issues/1954, we use # the solution in https://urllib3.readthedocs.io/en/latest/user-guide.html#ssl-py2 From 6cd1617129620c88c84dcfe55f1e21e1882ef7e0 Mon Sep 17 00:00:00 2001 From: liaogang Date: Thu, 10 Aug 2017 09:20:16 +0800 Subject: [PATCH 26/55] add pip install opencv-python --- Dockerfile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Dockerfile b/Dockerfile index c7fbe12c1b..5d4de6e30d 100644 --- a/Dockerfile +++ b/Dockerfile @@ -64,7 +64,7 @@ RUN pip install --upgrade pip && \ pip install -U sphinx-rtd-theme==0.1.9 recommonmark && \ pip install pre-commit 'requests==2.9.2' 'ipython==5.3.0' && \ pip install 'ipykernel==4.6.0' 'jupyter==1.0.0' && \ - pip install rarfile 'scipy>=0.19.0' 'nltk>=3.2.2' + pip install rarfile opencv-python 'scipy>=0.19.0' 'nltk>=3.2.2' # To fix https://github.com/PaddlePaddle/Paddle/issues/1954, we use # the solution in https://urllib3.readthedocs.io/en/latest/user-guide.html#ssl-py2 From 7d977e885ee22da42bd38731b90786fbc594f6eb Mon Sep 17 00:00:00 2001 From: liaogang Date: Thu, 10 Aug 2017 09:22:49 +0800 Subject: [PATCH 27/55] add pip install opencv-python --- Dockerfile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Dockerfile b/Dockerfile index c7fbe12c1b..5d4de6e30d 100644 --- a/Dockerfile +++ b/Dockerfile @@ -64,7 +64,7 @@ RUN pip install --upgrade pip && \ pip install -U sphinx-rtd-theme==0.1.9 recommonmark && \ pip install pre-commit 'requests==2.9.2' 'ipython==5.3.0' && \ pip install 'ipykernel==4.6.0' 'jupyter==1.0.0' && \ - pip install rarfile 'scipy>=0.19.0' 'nltk>=3.2.2' + pip install rarfile opencv-python 'scipy>=0.19.0' 'nltk>=3.2.2' # To fix https://github.com/PaddlePaddle/Paddle/issues/1954, we use # the solution in https://urllib3.readthedocs.io/en/latest/user-guide.html#ssl-py2 From ca6c29635682ce1ebf4d42d7f9f0b94c2c88f6f4 Mon Sep 17 00:00:00 2001 From: liaogang Date: Thu, 10 Aug 2017 09:24:17 +0800 Subject: [PATCH 28/55] update --- Dockerfile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Dockerfile b/Dockerfile index 5d4de6e30d..0d0c88f40c 100644 --- a/Dockerfile +++ b/Dockerfile @@ -64,7 +64,7 @@ RUN pip install --upgrade pip && \ pip install -U sphinx-rtd-theme==0.1.9 recommonmark && \ pip install pre-commit 'requests==2.9.2' 'ipython==5.3.0' && \ pip install 'ipykernel==4.6.0' 'jupyter==1.0.0' && \ - pip install rarfile opencv-python 'scipy>=0.19.0' 'nltk>=3.2.2' + pip install opencv-python rarfile 'scipy>=0.19.0' 'nltk>=3.2.2' # To fix https://github.com/PaddlePaddle/Paddle/issues/1954, we use # the solution in https://urllib3.readthedocs.io/en/latest/user-guide.html#ssl-py2 From 024243fee0c0feb6ab7a57ae7aff9acaf9fdffe7 Mon Sep 17 00:00:00 2001 From: dangqingqing Date: Thu, 10 Aug 2017 11:14:41 +0800 Subject: [PATCH 29/55] Fix typo error. --- paddle/gserver/tests/test_KmaxSeqScore.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/paddle/gserver/tests/test_KmaxSeqScore.cpp b/paddle/gserver/tests/test_KmaxSeqScore.cpp index 30aadae712..308abe6816 100644 --- a/paddle/gserver/tests/test_KmaxSeqScore.cpp +++ b/paddle/gserver/tests/test_KmaxSeqScore.cpp @@ -98,7 +98,7 @@ TEST(Layer, kmaxSeqScoreLayer) { std::vector mode = {false}; #ifndef PADDLE_ONLY_CPU - model.push_back(true); + mode.push_back(true); #endif for (auto hasSubseq : {false, true}) { From 7a56d46a8a1040773c3d4e27bc111124eae95bae Mon Sep 17 00:00:00 2001 From: liaogang Date: Thu, 10 Aug 2017 11:21:03 +0800 Subject: [PATCH 30/55] Rename PROJ_ROOT to PADDLE_SOURCE_DIR and PROJ_BINARY_ROOT to PADDLE_BINARY_DIR --- CMakeLists.txt | 8 ++-- cmake/configure.cmake | 2 +- cmake/cpplint.cmake | 2 +- cmake/package.cmake | 2 +- cmake/util.cmake | 4 +- cmake/version.cmake | 2 +- doc/templates/conf.py.cn.in | 6 +-- doc/templates/conf.py.en.in | 6 +-- paddle/api/CMakeLists.txt | 14 +++--- paddle/capi/tests/CMakeLists.txt | 4 +- paddle/gserver/tests/CMakeLists.txt | 24 +++++----- paddle/math/CMakeLists.txt | 8 ++-- paddle/pserver/test/CMakeLists.txt | 6 +-- paddle/trainer/tests/CMakeLists.txt | 48 +++++++++---------- paddle/utils/tests/CMakeLists.txt | 2 +- proto/CMakeLists.txt | 4 +- python/CMakeLists.txt | 8 ++-- .../tests/CMakeLists.txt | 14 +++--- python/setup.py.in | 14 +++--- 19 files changed, 89 insertions(+), 89 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index b174831109..72a9165431 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -14,8 +14,8 @@ cmake_minimum_required(VERSION 3.0) set(CMAKE_MODULE_PATH ${CMAKE_MODULE_PATH} "${CMAKE_CURRENT_SOURCE_DIR}/cmake") -set(PROJ_ROOT ${CMAKE_CURRENT_SOURCE_DIR}) -set(PROJ_BINARY_ROOT ${CMAKE_CURRENT_BINARY_DIR}) +set(PADDLE_SOURCE_DIR ${CMAKE_CURRENT_SOURCE_DIR}) +set(PADDLE_BINARY_DIR ${CMAKE_CURRENT_BINARY_DIR}) include(system) @@ -121,8 +121,8 @@ include(version) # set PADDLE_VERSION include(coveralls) # set code coverage -include_directories("${PROJ_ROOT}") -include_directories("${PROJ_ROOT}/paddle/cuda/include") +include_directories("${PADDLE_SOURCE_DIR}") +include_directories("${PADDLE_SOURCE_DIR}/paddle/cuda/include") include_directories("${CMAKE_CURRENT_BINARY_DIR}/proto") include_directories("${CMAKE_CURRENT_BINARY_DIR}/go/pserver/client/c") include_directories(${Boost_INCLUDE_DIRS}) diff --git a/cmake/configure.cmake b/cmake/configure.cmake index 2ac0989546..209f9078a6 100644 --- a/cmake/configure.cmake +++ b/cmake/configure.cmake @@ -129,7 +129,7 @@ if(WITH_GOLANG) add_custom_command(OUTPUT ${CMAKE_BINARY_DIR}/glide COMMAND env GOPATH=${GOPATH} ${GLIDE} install COMMAND touch ${CMAKE_BINARY_DIR}/glide - DEPENDS ${PROJ_ROOT}/go/glide.lock + DEPENDS ${PADDLE_SOURCE_DIR}/go/glide.lock WORKING_DIRECTORY "${PADDLE_IN_GOPATH}/go" ) diff --git a/cmake/cpplint.cmake b/cmake/cpplint.cmake index 5184f0815f..8d5d533126 100644 --- a/cmake/cpplint.cmake +++ b/cmake/cpplint.cmake @@ -52,7 +52,7 @@ macro(add_style_check_target TARGET_NAME) if(SOURCES_LIST) add_custom_command(TARGET ${TARGET_NAME} POST_BUILD - COMMAND "${PYTHON_EXECUTABLE}" "${PROJ_ROOT}/paddle/scripts/cpplint.py" + COMMAND "${PYTHON_EXECUTABLE}" "${PADDLE_SOURCE_DIR}/paddle/scripts/cpplint.py" "--filter=${STYLE_FILTER}" ${SOURCES_LIST} COMMENT "cpplint: Checking source code style" diff --git a/cmake/package.cmake b/cmake/package.cmake index ff49a2d08e..79e02147f3 100644 --- a/cmake/package.cmake +++ b/cmake/package.cmake @@ -12,7 +12,7 @@ set(CPACK_PACKAGE_DESCRIPTION "") set(CPACK_DEBIAN_PACKAGE_DEPENDS "libpython2.7-dev, libstdc++6, python-pip, curl, libgfortran3, python-pip-whl") set(CPACK_DEBIAN_PACKAGE_SECTION Devel) set(CPACK_DEBIAN_PACKAGE_VERSION ${PADDLE_VERSION}) -set(CPACK_DEBIAN_PACKAGE_CONTROL_EXTRA "${PROJ_ROOT}/paddle/scripts/deb/postinst") +set(CPACK_DEBIAN_PACKAGE_CONTROL_EXTRA "${PADDLE_SOURCE_DIR}/paddle/scripts/deb/postinst") #set(CPACK_GENERATOR "DEB") # Start cpack include (CMakePackageConfigHelpers) diff --git a/cmake/util.cmake b/cmake/util.cmake index 4a27623b7f..0da4969d31 100644 --- a/cmake/util.cmake +++ b/cmake/util.cmake @@ -141,8 +141,8 @@ endmacro() function(create_resources res_file output_file) add_custom_command( OUTPUT ${output_file} - COMMAND python ARGS ${PROJ_ROOT}/cmake/make_resource.py ${res_file} ${output_file} - DEPENDS ${res_file} ${PROJ_ROOT}/cmake/make_resource.py) + COMMAND python ARGS ${PADDLE_SOURCE_DIR}/cmake/make_resource.py ${res_file} ${output_file} + DEPENDS ${res_file} ${PADDLE_SOURCE_DIR}/cmake/make_resource.py) endfunction() diff --git a/cmake/version.cmake b/cmake/version.cmake index ac1583a24c..cde650128a 100644 --- a/cmake/version.cmake +++ b/cmake/version.cmake @@ -4,7 +4,7 @@ set(tmp_version "HEAD") while ("${PADDLE_VERSION}" STREQUAL "") execute_process( COMMAND ${GIT_EXECUTABLE} describe --tags --abbrev=0 ${tmp_version} - WORKING_DIRECTORY ${PROJ_ROOT} + WORKING_DIRECTORY ${PADDLE_SOURCE_DIR} OUTPUT_VARIABLE GIT_TAG_NAME RESULT_VARIABLE GIT_RESULT ERROR_QUIET OUTPUT_STRIP_TRAILING_WHITESPACE) diff --git a/doc/templates/conf.py.cn.in b/doc/templates/conf.py.cn.in index 673948dfe7..41b35b5b23 100644 --- a/doc/templates/conf.py.cn.in +++ b/doc/templates/conf.py.cn.in @@ -13,7 +13,7 @@ # serve to show the default. import sys import os, subprocess -sys.path.insert(0, os.path.abspath('@PROJ_ROOT@/python')) +sys.path.insert(0, os.path.abspath('@PADDLE_SOURCE_DIR@/python')) import shlex from recommonmark import parser, transform import paddle @@ -24,7 +24,7 @@ AutoStructify = transform.AutoStructify # If extensions (or modules to document with autodoc) are in another directory, # add these directories to sys.path here. If the directory is relative to the # documentation root, use os.path.abspath to make it absolute, like shown here. -templates_path = ["@PROJ_ROOT@/doc_theme/templates"] +templates_path = ["@PADDLE_SOURCE_DIR@/doc_theme/templates"] # -- General configuration ------------------------------------------------ @@ -120,7 +120,7 @@ html_theme = 'sphinx_rtd_theme' # Add any paths that contain custom static files (such as style sheets) here, # relative to this directory. They are copied after the builtin static files, # so a file named "default.css" will overwrite the builtin "default.css". -html_static_path = ['@PROJ_ROOT@/doc_theme/static'] +html_static_path = ['@PADDLE_SOURCE_DIR@/doc_theme/static'] # Output file base name for HTML help builder. htmlhelp_basename = project + 'doc' diff --git a/doc/templates/conf.py.en.in b/doc/templates/conf.py.en.in index b6b50b7dcd..5822c2481d 100644 --- a/doc/templates/conf.py.en.in +++ b/doc/templates/conf.py.en.in @@ -13,7 +13,7 @@ # serve to show the default. import sys import os, subprocess -sys.path.insert(0, os.path.abspath('@PROJ_ROOT@/python')) +sys.path.insert(0, os.path.abspath('@PADDLE_SOURCE_DIR@/python')) import shlex from recommonmark import parser, transform import paddle @@ -25,7 +25,7 @@ AutoStructify = transform.AutoStructify # If extensions (or modules to document with autodoc) are in another directory, # add these directories to sys.path here. If the directory is relative to the # documentation root, use os.path.abspath to make it absolute, like shown here. -templates_path = ["@PROJ_ROOT@/doc_theme/templates"] +templates_path = ["@PADDLE_SOURCE_DIR@/doc_theme/templates"] # -- General configuration ------------------------------------------------ @@ -120,7 +120,7 @@ html_theme = 'sphinx_rtd_theme' # Add any paths that contain custom static files (such as style sheets) here, # relative to this directory. They are copied after the builtin static files, # so a file named "default.css" will overwrite the builtin "default.css". -html_static_path = ['@PROJ_ROOT@/doc_theme/static'] +html_static_path = ['@PADDLE_SOURCE_DIR@/doc_theme/static'] # Output file base name for HTML help builder. htmlhelp_basename = project + 'doc' diff --git a/paddle/api/CMakeLists.txt b/paddle/api/CMakeLists.txt index 7a1e8b8b26..d7b3d2bdec 100644 --- a/paddle/api/CMakeLists.txt +++ b/paddle/api/CMakeLists.txt @@ -19,9 +19,9 @@ add_library(paddle_api STATIC ${API_SOURCES}) add_dependencies(paddle_api paddle_proto paddle_trainer_lib) INCLUDE(${SWIG_USE_FILE}) -INCLUDE_DIRECTORIES(${PROJ_ROOT}/paddle) +INCLUDE_DIRECTORIES(${PADDLE_SOURCE_DIR}/paddle) -FILE(GLOB PY_PADDLE_PYTHON_FILES ${PROJ_ROOT}/paddle/py_paddle/*.py) +FILE(GLOB PY_PADDLE_PYTHON_FILES ${PADDLE_SOURCE_DIR}/paddle/py_paddle/*.py) SET_SOURCE_FILES_PROPERTIES(Paddle.i PROPERTIES CPLUSPLUS ON) @@ -79,16 +79,16 @@ SWIG_LINK_LIBRARIES(swig_paddle ${START_END} ) -add_custom_command(OUTPUT ${PROJ_ROOT}/paddle/py_paddle/_swig_paddle.so - COMMAND cp ${CMAKE_CURRENT_BINARY_DIR}/swig_paddle.py ${PROJ_ROOT}/paddle/py_paddle - COMMAND cp ${CMAKE_CURRENT_BINARY_DIR}/_swig_paddle.so ${PROJ_ROOT}/paddle/py_paddle +add_custom_command(OUTPUT ${PADDLE_SOURCE_DIR}/paddle/py_paddle/_swig_paddle.so + COMMAND cp ${CMAKE_CURRENT_BINARY_DIR}/swig_paddle.py ${PADDLE_SOURCE_DIR}/paddle/py_paddle + COMMAND cp ${CMAKE_CURRENT_BINARY_DIR}/_swig_paddle.so ${PADDLE_SOURCE_DIR}/paddle/py_paddle COMMAND ${CMAKE_COMMAND} -E touch .timestamp - WORKING_DIRECTORY ${PROJ_ROOT}/paddle + WORKING_DIRECTORY ${PADDLE_SOURCE_DIR}/paddle DEPENDS _swig_paddle ) # TODO(yuyang18) : make wheel name calculated by cmake -add_custom_target(python_api_wheel ALL DEPENDS ${PROJ_ROOT}/paddle/py_paddle/_swig_paddle.so) +add_custom_target(python_api_wheel ALL DEPENDS ${PADDLE_SOURCE_DIR}/paddle/py_paddle/_swig_paddle.so) if(WITH_TESTING) IF(NOT PY_PIP_FOUND) diff --git a/paddle/capi/tests/CMakeLists.txt b/paddle/capi/tests/CMakeLists.txt index d73f6b7733..8208808b94 100644 --- a/paddle/capi/tests/CMakeLists.txt +++ b/paddle/capi/tests/CMakeLists.txt @@ -10,5 +10,5 @@ target_include_directories(capi_test_gradientMachine PUBLIC ${PADDLE_CAPI_INC_PATH}) target_link_libraries(capi_test_gradientMachine paddle_capi) add_test(NAME capi_test_gradientMachine - COMMAND ${PROJ_ROOT}/paddle/.set_python_path.sh -d ${PROJ_ROOT}/python ${CMAKE_CURRENT_BINARY_DIR}/capi_test_gradientMachine - WORKING_DIRECTORY ${PROJ_ROOT}/paddle/capi/tests) + COMMAND ${PADDLE_SOURCE_DIR}/paddle/.set_python_path.sh -d ${PADDLE_SOURCE_DIR}/python ${CMAKE_CURRENT_BINARY_DIR}/capi_test_gradientMachine + WORKING_DIRECTORY ${PADDLE_SOURCE_DIR}/paddle/capi/tests) diff --git a/paddle/gserver/tests/CMakeLists.txt b/paddle/gserver/tests/CMakeLists.txt index 209d0ab9c8..294d5f115d 100644 --- a/paddle/gserver/tests/CMakeLists.txt +++ b/paddle/gserver/tests/CMakeLists.txt @@ -9,7 +9,7 @@ add_unittest_without_exec(test_ProtoDataProvider # mkdir will get error. add_test(NAME test_ProtoDataProvider COMMAND ${CMAKE_CURRENT_BINARY_DIR}/test_ProtoDataProvider - WORKING_DIRECTORY ${PROJ_ROOT}/paddle) + WORKING_DIRECTORY ${PADDLE_SOURCE_DIR}/paddle) ################# test_LayerGrad ####################### add_unittest_without_exec(test_LayerGrad @@ -92,8 +92,8 @@ if(WITH_PYTHON) test_PyDataProvider.cpp) add_test(NAME test_PyDataProvider - COMMAND .set_python_path.sh -d ./gserver/tests:${PROJ_ROOT}/python/ ${CMAKE_CURRENT_BINARY_DIR}/test_PyDataProvider - WORKING_DIRECTORY ${PROJ_ROOT}/paddle) + COMMAND .set_python_path.sh -d ./gserver/tests:${PADDLE_SOURCE_DIR}/python/ ${CMAKE_CURRENT_BINARY_DIR}/test_PyDataProvider + WORKING_DIRECTORY ${PADDLE_SOURCE_DIR}/paddle) endif() ############### test_RecurrentLayer ####################### @@ -106,7 +106,7 @@ if(NOT WITH_DOUBLE) add_test(NAME test_WarpCTCLayer COMMAND ${CMAKE_CURRENT_BINARY_DIR}/test_WarpCTCLayer --warpctc_dir=${WARPCTC_LIB_DIR} - WORKING_DIRECTORY ${PROJ_ROOT}/paddle) + WORKING_DIRECTORY ${PADDLE_SOURCE_DIR}/paddle) endif() ############### test_RecurrentGradientMachine ############### @@ -116,20 +116,20 @@ add_unittest_without_exec(test_RecurrentGradientMachine test_RecurrentGradientMachine.cpp) add_test(NAME test_RecurrentGradientMachine COMMAND .set_python_path.sh -d - ${PROJ_ROOT}/python:${PROJ_ROOT}/paddle/gserver/tests + ${PADDLE_SOURCE_DIR}/python:${PADDLE_SOURCE_DIR}/paddle/gserver/tests ${CMAKE_CURRENT_BINARY_DIR}/test_RecurrentGradientMachine - WORKING_DIRECTORY ${PROJ_ROOT}/paddle) + WORKING_DIRECTORY ${PADDLE_SOURCE_DIR}/paddle) add_unittest_without_exec(test_NetworkCompare test_NetworkCompare.cpp) if(WITH_GPU) add_test(NAME test_NetworkCompare - COMMAND .set_python_path.sh -d ${PROJ_ROOT}/python ${CMAKE_CURRENT_BINARY_DIR}/test_NetworkCompare --use_gpu=true - WORKING_DIRECTORY ${PROJ_ROOT}/paddle) + COMMAND .set_python_path.sh -d ${PADDLE_SOURCE_DIR}/python ${CMAKE_CURRENT_BINARY_DIR}/test_NetworkCompare --use_gpu=true + WORKING_DIRECTORY ${PADDLE_SOURCE_DIR}/paddle) else() add_test(NAME test_NetworkCompare - COMMAND .set_python_path.sh -d ${PROJ_ROOT}/python ${CMAKE_CURRENT_BINARY_DIR}/test_NetworkCompare --use_gpu=false - WORKING_DIRECTORY ${PROJ_ROOT}/paddle) + COMMAND .set_python_path.sh -d ${PADDLE_SOURCE_DIR}/python ${CMAKE_CURRENT_BINARY_DIR}/test_NetworkCompare --use_gpu=false + WORKING_DIRECTORY ${PADDLE_SOURCE_DIR}/paddle) endif() @@ -137,6 +137,6 @@ add_unittest_without_exec(test_PyDataProvider2 test_PyDataProvider2.cpp) add_test(NAME test_PyDataProvider2 - COMMAND .set_python_path.sh -d ${PROJ_ROOT}/paddle/gserver/tests:${PROJ_ROOT}/python ${CMAKE_CURRENT_BINARY_DIR}/test_PyDataProvider2 - WORKING_DIRECTORY ${PROJ_ROOT}/paddle + COMMAND .set_python_path.sh -d ${PADDLE_SOURCE_DIR}/paddle/gserver/tests:${PADDLE_SOURCE_DIR}/python ${CMAKE_CURRENT_BINARY_DIR}/test_PyDataProvider2 + WORKING_DIRECTORY ${PADDLE_SOURCE_DIR}/paddle ) diff --git a/paddle/math/CMakeLists.txt b/paddle/math/CMakeLists.txt index 9981de6160..bf28092e82 100644 --- a/paddle/math/CMakeLists.txt +++ b/paddle/math/CMakeLists.txt @@ -15,13 +15,13 @@ file(GLOB MATH_HEADERS . *.h) file(GLOB MATH_SOURCES . *.cpp) set(MATH_SOURCES - "${PROJ_ROOT}/paddle/math/BaseMatrix.cu" - "${PROJ_ROOT}/paddle/math/TrainingAlgorithmOp.cu" + "${PADDLE_SOURCE_DIR}/paddle/math/BaseMatrix.cu" + "${PADDLE_SOURCE_DIR}/paddle/math/TrainingAlgorithmOp.cu" ${MATH_SOURCES}) if(NOT WITH_GPU) # then compile BaseMatrix.cu as c++ file - compile_cu_as_cpp("${PROJ_ROOT}/paddle/math/BaseMatrix.cu") - compile_cu_as_cpp("${PROJ_ROOT}/paddle/math/TrainingAlgorithmOp.cu") + compile_cu_as_cpp("${PADDLE_SOURCE_DIR}/paddle/math/BaseMatrix.cu") + compile_cu_as_cpp("${PADDLE_SOURCE_DIR}/paddle/math/TrainingAlgorithmOp.cu") add_library(paddle_math STATIC ${MATH_SOURCES}) else() diff --git a/paddle/pserver/test/CMakeLists.txt b/paddle/pserver/test/CMakeLists.txt index 6e8f9c37f6..b66a00ba06 100644 --- a/paddle/pserver/test/CMakeLists.txt +++ b/paddle/pserver/test/CMakeLists.txt @@ -3,7 +3,7 @@ add_unittest_without_exec(socket_test SocketTest.cpp) add_test(NAME socket_test - COMMAND ${PROJ_ROOT}/paddle/.set_port.sh -p port + COMMAND ${PADDLE_SOURCE_DIR}/paddle/.set_port.sh -p port ${CMAKE_CURRENT_BINARY_DIR}/socket_test --loop_time=10) ####################### test_ProtoServer #################### @@ -12,7 +12,7 @@ add_unittest_without_exec(test_ProtoServer IF(NOT ON_TRAVIS) add_test(NAME test_ProtoServer - COMMAND ${PROJ_ROOT}/paddle/.set_port.sh -p port + COMMAND ${PADDLE_SOURCE_DIR}/paddle/.set_port.sh -p port ${CMAKE_CURRENT_BINARY_DIR}/test_ProtoServer) ENDIF(NOT ON_TRAVIS) @@ -24,5 +24,5 @@ ENDIF(NOT ON_TRAVIS) add_unittest_without_exec(test_ParameterServer2 test_ParameterServer2.cpp) add_test(NAME test_ParameterServer2 - COMMAND ${PROJ_ROOT}/paddle/.set_port.sh -p port -n 4 + COMMAND ${PADDLE_SOURCE_DIR}/paddle/.set_port.sh -p port -n 4 ${CMAKE_CURRENT_BINARY_DIR}/test_ParameterServer2) diff --git a/paddle/trainer/tests/CMakeLists.txt b/paddle/trainer/tests/CMakeLists.txt index 08b2d8a38e..f01ad4142d 100644 --- a/paddle/trainer/tests/CMakeLists.txt +++ b/paddle/trainer/tests/CMakeLists.txt @@ -2,19 +2,19 @@ add_unittest_without_exec(test_Compare test_Compare.cpp) add_test(NAME test_Compare - COMMAND ${PROJ_ROOT}/paddle/.set_python_path.sh -d ${PROJ_ROOT}/python + COMMAND ${PADDLE_SOURCE_DIR}/paddle/.set_python_path.sh -d ${PADDLE_SOURCE_DIR}/python ${CMAKE_CURRENT_BINARY_DIR}/test_Compare - WORKING_DIRECTORY ${PROJ_ROOT}/paddle/) + WORKING_DIRECTORY ${PADDLE_SOURCE_DIR}/paddle/) ################# test_Trainer ########################### add_unittest_without_exec(test_Trainer test_Trainer.cpp) add_test(NAME test_Trainer - COMMAND ${PROJ_ROOT}/paddle/.set_python_path.sh -d ${PROJ_ROOT}/python/ - ${PYTHON_EXECUTABLE} ${PROJ_ROOT}/paddle/trainer/tests/gen_proto_data.py && - ${PROJ_ROOT}/paddle/.set_python_path.sh -d ${PROJ_ROOT}/python/ + COMMAND ${PADDLE_SOURCE_DIR}/paddle/.set_python_path.sh -d ${PADDLE_SOURCE_DIR}/python/ + ${PYTHON_EXECUTABLE} ${PADDLE_SOURCE_DIR}/paddle/trainer/tests/gen_proto_data.py && + ${PADDLE_SOURCE_DIR}/paddle/.set_python_path.sh -d ${PADDLE_SOURCE_DIR}/python/ ${CMAKE_CURRENT_BINARY_DIR}/test_Trainer - WORKING_DIRECTORY ${PROJ_ROOT}/paddle/) + WORKING_DIRECTORY ${PADDLE_SOURCE_DIR}/paddle/) ############### test_TrainerOnePass ########################## if(WITH_PYTHON) @@ -23,60 +23,60 @@ if(WITH_PYTHON) add_unittest_without_exec(test_TrainerOnePass test_TrainerOnePass.cpp) add_test(NAME test_TrainerOnePass - COMMAND ${PROJ_ROOT}/paddle/.set_python_path.sh -d - ${PROJ_ROOT}/python/:${PROJ_ROOT}/paddle/trainer/tests - ${PROJ_ROOT}/paddle/.set_port.sh -p port ${CMAKE_CURRENT_BINARY_DIR}/test_TrainerOnePass - WORKING_DIRECTORY ${PROJ_ROOT}/paddle/) + COMMAND ${PADDLE_SOURCE_DIR}/paddle/.set_python_path.sh -d + ${PADDLE_SOURCE_DIR}/python/:${PADDLE_SOURCE_DIR}/paddle/trainer/tests + ${PADDLE_SOURCE_DIR}/paddle/.set_port.sh -p port ${CMAKE_CURRENT_BINARY_DIR}/test_TrainerOnePass + WORKING_DIRECTORY ${PADDLE_SOURCE_DIR}/paddle/) endif() ################ test_CompareTwoNets ###################### add_unittest_without_exec(test_CompareTwoNets test_CompareTwoNets.cpp) add_test(NAME test_CompareTwoNets - COMMAND ${PROJ_ROOT}/paddle/.set_python_path.sh -d ${PROJ_ROOT}/python/ + COMMAND ${PADDLE_SOURCE_DIR}/paddle/.set_python_path.sh -d ${PADDLE_SOURCE_DIR}/python/ ${CMAKE_CURRENT_BINARY_DIR}/test_CompareTwoNets --config_file_a=trainer/tests/sample_trainer_config_qb_rnn.conf --config_file_b=trainer/tests/sample_trainer_config_rnn.conf - WORKING_DIRECTORY ${PROJ_ROOT}/paddle/) + WORKING_DIRECTORY ${PADDLE_SOURCE_DIR}/paddle/) ############### test_CompareTwoOpts ################### add_unittest_without_exec(test_CompareTwoOpts test_CompareTwoOpts.cpp) add_test(NAME test_CompareTwoOpts - COMMAND ${PROJ_ROOT}/paddle/.set_python_path.sh -d ${PROJ_ROOT}/python/ + COMMAND ${PADDLE_SOURCE_DIR}/paddle/.set_python_path.sh -d ${PADDLE_SOURCE_DIR}/python/ ${CMAKE_CURRENT_BINARY_DIR}/test_CompareTwoOpts --config_file_a=trainer/tests/sample_trainer_config_opt_a.conf --config_file_b=trainer/tests/sample_trainer_config_opt_b.conf --num_passes=1 --need_high_accuracy=0 - WORKING_DIRECTORY ${PROJ_ROOT}/paddle/) + WORKING_DIRECTORY ${PADDLE_SOURCE_DIR}/paddle/) ################# test_CompareSparse ################## add_unittest_without_exec(test_CompareSparse test_CompareSparse.cpp) if(NOT ON_TRAVIS) add_test(NAME test_CompareSparse - COMMAND ${PROJ_ROOT}/paddle/.set_python_path.sh -d ${PROJ_ROOT}/python/ + COMMAND ${PADDLE_SOURCE_DIR}/paddle/.set_python_path.sh -d ${PADDLE_SOURCE_DIR}/python/ ./.set_port.sh -p port -n 6 ${CMAKE_CURRENT_BINARY_DIR}/test_CompareSparse - WORKING_DIRECTORY ${PROJ_ROOT}/paddle/) + WORKING_DIRECTORY ${PADDLE_SOURCE_DIR}/paddle/) endif() ################# test_recurrent_machine_generation ############### add_unittest_without_exec(test_recurrent_machine_generation test_recurrent_machine_generation.cpp) add_test(NAME test_recurrent_machine_generation - COMMAND ${PROJ_ROOT}/paddle/.set_python_path.sh -d ${PROJ_ROOT}/python/ + COMMAND ${PADDLE_SOURCE_DIR}/paddle/.set_python_path.sh -d ${PADDLE_SOURCE_DIR}/python/ ${CMAKE_CURRENT_BINARY_DIR}/test_recurrent_machine_generation - WORKING_DIRECTORY ${PROJ_ROOT}/paddle/) + WORKING_DIRECTORY ${PADDLE_SOURCE_DIR}/paddle/) #################### test_PyDataProviderWrapper ######################### add_unittest_without_exec(test_PyDataProviderWrapper test_PyDataProviderWrapper.cpp) add_test(NAME test_PyDataProviderWrapper - COMMAND ${PROJ_ROOT}/paddle/.set_python_path.sh -d - ${PROJ_ROOT}/python/:${PROJ_ROOT}/paddle/trainer/tests + COMMAND ${PADDLE_SOURCE_DIR}/paddle/.set_python_path.sh -d + ${PADDLE_SOURCE_DIR}/python/:${PADDLE_SOURCE_DIR}/paddle/trainer/tests ${CMAKE_CURRENT_BINARY_DIR}/test_PyDataProviderWrapper - WORKING_DIRECTORY ${PROJ_ROOT}/paddle/) + WORKING_DIRECTORY ${PADDLE_SOURCE_DIR}/paddle/) #################### test_config_parser ######################### add_test(NAME test_config_parser - COMMAND ${PROJ_ROOT}/paddle/.set_python_path.sh -d ${PROJ_ROOT}/python/ - ${PYTHON_EXECUTABLE} ${PROJ_ROOT}/paddle/trainer/tests/config_parser_test.py - WORKING_DIRECTORY ${PROJ_ROOT}/paddle/) + COMMAND ${PADDLE_SOURCE_DIR}/paddle/.set_python_path.sh -d ${PADDLE_SOURCE_DIR}/python/ + ${PYTHON_EXECUTABLE} ${PADDLE_SOURCE_DIR}/paddle/trainer/tests/config_parser_test.py + WORKING_DIRECTORY ${PADDLE_SOURCE_DIR}/paddle/) diff --git a/paddle/utils/tests/CMakeLists.txt b/paddle/utils/tests/CMakeLists.txt index aa923b3553..c770ce1698 100644 --- a/paddle/utils/tests/CMakeLists.txt +++ b/paddle/utils/tests/CMakeLists.txt @@ -13,6 +13,6 @@ add_executable( link_paddle_exe(test_CustomStackTracePrint) if(NOT APPLE) add_test(NAME test_CustomStackTracePrint - COMMAND ${PROJ_ROOT}/paddle/utils/tests/test_CustomStackTracePrint.sh + COMMAND ${PADDLE_SOURCE_DIR}/paddle/utils/tests/test_CustomStackTracePrint.sh WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}) endif() diff --git a/proto/CMakeLists.txt b/proto/CMakeLists.txt index e1cea8bd0d..6212c2e60a 100644 --- a/proto/CMakeLists.txt +++ b/proto/CMakeLists.txt @@ -9,13 +9,13 @@ foreach(filename ${proto_filenames}) get_filename_component(ABS_FIL ${filename} ABSOLUTE) get_filename_component(FIL_WE ${filename} NAME_WE) set(CUR_PROTO_GEN_PY - ${PROJ_ROOT}/paddle/python/paddle/proto/${FIL_WE}_pb2.py) + ${PADDLE_SOURCE_DIR}/paddle/python/paddle/proto/${FIL_WE}_pb2.py) set(PROTO_GEN_PY ${CUR_PROTO_GEN_PY} ${PROTO_GEN_PY}) add_custom_command(OUTPUT ${CUR_PROTO_GEN_PY} COMMAND ${PROTOBUF_PROTOC_EXECUTABLE} - ARGS "--python_out=${PROJ_ROOT}/python/paddle/proto" + ARGS "--python_out=${PADDLE_SOURCE_DIR}/python/paddle/proto" "-I" ${CMAKE_CURRENT_SOURCE_DIR} ${ABS_FIL} DEPENDS ${ABS_FIL} protoc) endforeach() diff --git a/python/CMakeLists.txt b/python/CMakeLists.txt index b5030da8e7..02e4f7c477 100644 --- a/python/CMakeLists.txt +++ b/python/CMakeLists.txt @@ -18,7 +18,7 @@ SET(COPY_PADDLE_MASTER "") if(WITH_GOLANG) SET(COPY_PADDLE_MASTER "copy_paddle_master") add_custom_command(TARGET ${COPY_PADDLE_MASTER} - COMMAND cp ${paddle_master_LIB_PATH} ${PROJ_ROOT}/python/paddle/v2/master/ + COMMAND cp ${paddle_master_LIB_PATH} ${PADDLE_SOURCE_DIR}/python/paddle/v2/master/ ) add_dependencies(copy_paddle_master paddle_master) endif(WITH_GOLANG) @@ -27,10 +27,10 @@ configure_file(${CMAKE_CURRENT_SOURCE_DIR}/setup.py.in ${CMAKE_CURRENT_BINARY_DIR}/setup.py) -add_custom_command(OUTPUT ${PROJ_ROOT}/python/paddle/v2/framework/core.so - COMMAND cmake -E copy $ ${PROJ_ROOT}/python/paddle/v2/framework/core.so +add_custom_command(OUTPUT ${PADDLE_SOURCE_DIR}/python/paddle/v2/framework/core.so + COMMAND cmake -E copy $ ${PADDLE_SOURCE_DIR}/python/paddle/v2/framework/core.so DEPENDS paddle_pybind) -add_custom_target(copy_paddle_pybind ALL DEPENDS ${PROJ_ROOT}/python/paddle/v2/framework/core.so) +add_custom_target(copy_paddle_pybind ALL DEPENDS ${PADDLE_SOURCE_DIR}/python/paddle/v2/framework/core.so) add_custom_command(OUTPUT ${OUTPUT_DIR}/.timestamp diff --git a/python/paddle/trainer_config_helpers/tests/CMakeLists.txt b/python/paddle/trainer_config_helpers/tests/CMakeLists.txt index 6c860fd497..580aef935b 100644 --- a/python/paddle/trainer_config_helpers/tests/CMakeLists.txt +++ b/python/paddle/trainer_config_helpers/tests/CMakeLists.txt @@ -1,17 +1,17 @@ #################### test_config_parser ######################### add_test(NAME layers_test - COMMAND ${PROJ_ROOT}/paddle/.set_python_path.sh -d ${PROJ_ROOT}/python/ - ${PYTHON_EXECUTABLE} ${PROJ_ROOT}/python/paddle/trainer_config_helpers/tests/layers_test.py - WORKING_DIRECTORY ${PROJ_ROOT}/python/paddle) + COMMAND ${PADDLE_SOURCE_DIR}/paddle/.set_python_path.sh -d ${PADDLE_SOURCE_DIR}/python/ + ${PYTHON_EXECUTABLE} ${PADDLE_SOURCE_DIR}/python/paddle/trainer_config_helpers/tests/layers_test.py + WORKING_DIRECTORY ${PADDLE_SOURCE_DIR}/python/paddle) add_test(NAME test_reset_hook - COMMAND ${PROJ_ROOT}/paddle/.set_python_path.sh -d ${PROJ_ROOT}/python/ - ${PYTHON_EXECUTABLE} ${PROJ_ROOT}/python/paddle/trainer_config_helpers/tests/test_reset_hook.py - WORKING_DIRECTORY ${PROJ_ROOT}/python/paddle) + COMMAND ${PADDLE_SOURCE_DIR}/paddle/.set_python_path.sh -d ${PADDLE_SOURCE_DIR}/python/ + ${PYTHON_EXECUTABLE} ${PADDLE_SOURCE_DIR}/python/paddle/trainer_config_helpers/tests/test_reset_hook.py + WORKING_DIRECTORY ${PADDLE_SOURCE_DIR}/python/paddle) add_paddle_exe(protobuf_equal ProtobufEqualMain.cpp) add_test(NAME test_layerHelpers COMMAND - ${PROJ_ROOT}/python/paddle/trainer_config_helpers/tests/configs/run_tests.sh ${PYTHON_EXECUTABLE} + ${PADDLE_SOURCE_DIR}/python/paddle/trainer_config_helpers/tests/configs/run_tests.sh ${PYTHON_EXECUTABLE} ${CMAKE_CURRENT_BINARY_DIR}/protobuf_equal ) diff --git a/python/setup.py.in b/python/setup.py.in index 38f0a503be..4110c98318 100644 --- a/python/setup.py.in +++ b/python/setup.py.in @@ -45,14 +45,14 @@ setup(name='paddlepaddle', '': '${CMAKE_CURRENT_SOURCE_DIR}', # The paddle.v2.framework.proto will be generated while compiling. # So that package points to other directory. - 'paddle.v2.framework.proto': '${PROJ_BINARY_ROOT}/paddle/framework', - 'py_paddle': '${PROJ_ROOT}/paddle/py_paddle' + 'paddle.v2.framework.proto': '${PADDLE_BINARY_DIR}/paddle/framework', + 'py_paddle': '${PADDLE_SOURCE_DIR}/paddle/py_paddle' }, - scripts=['${PROJ_BINARY_ROOT}/paddle/scripts/paddle'], + scripts=['${PADDLE_BINARY_DIR}/paddle/scripts/paddle'], distclass=BinaryDistribution, data_files=[('/usr/local/opt/paddle/bin', - ['${PROJ_BINARY_ROOT}/paddle/scripts/paddle_usage', - '${PROJ_BINARY_ROOT}/paddle/trainer/paddle_trainer', - '${PROJ_BINARY_ROOT}/paddle/trainer/paddle_merge_model', - '${PROJ_BINARY_ROOT}/paddle/pserver/paddle_pserver_main'])] + ['${PADDLE_BINARY_DIR}/paddle/scripts/paddle_usage', + '${PADDLE_BINARY_DIR}/paddle/trainer/paddle_trainer', + '${PADDLE_BINARY_DIR}/paddle/trainer/paddle_merge_model', + '${PADDLE_BINARY_DIR}/paddle/pserver/paddle_pserver_main'])] ) From c304e02813e0628acfbce0fb21239cca931483ca Mon Sep 17 00:00:00 2001 From: qiaolongfei Date: Thu, 10 Aug 2017 12:31:06 +0800 Subject: [PATCH 31/55] fix py_padde test --- CMakeLists.txt | 2 ++ cmake/generic.cmake | 2 +- python/CMakeLists.txt | 10 +++------- 3 files changed, 6 insertions(+), 8 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index b174831109..89e1fec566 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -164,10 +164,12 @@ if(WITH_GOLANG) add_subdirectory(go) endif(WITH_GOLANG) +set(PADDLE_PYTHON_BUILD_DIR "${CMAKE_CURRENT_BINARY_DIR}/python/build") add_subdirectory(paddle) if(WITH_PYTHON) add_subdirectory(python) endif() + if(WITH_DOC) add_subdirectory(doc) endif() diff --git a/cmake/generic.cmake b/cmake/generic.cmake index 9f907a9dc2..951642e70b 100644 --- a/cmake/generic.cmake +++ b/cmake/generic.cmake @@ -411,7 +411,7 @@ function(py_test TARGET_NAME) set(multiValueArgs SRCS DEPS) cmake_parse_arguments(py_test "${options}" "${oneValueArgs}" "${multiValueArgs}" ${ARGN}) add_test(NAME ${TARGET_NAME} - COMMAND env PYTHONPATH=${PADDLE_PYTHON_LIB_DIR} + COMMAND env PYTHONPATH=${PROJ_ROOT}/paddle:${PADDLE_PYTHON_BUILD_DIR}/lib python2 ${py_test_SRCS} WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}) endif() diff --git a/python/CMakeLists.txt b/python/CMakeLists.txt index fc8c6f6a42..684691d240 100644 --- a/python/CMakeLists.txt +++ b/python/CMakeLists.txt @@ -1,7 +1,3 @@ -set(OUTPUT_DIR - "${CMAKE_CURRENT_BINARY_DIR}/build") - -set(PADDLE_PYTHON_LIB_DIR "${OUTPUT_DIR}/lib") file(GLOB TRAINER_PY_FILES . ./paddle/trainer/*.py) file(GLOB HELPERS_PY_FILES . ./paddle/trainer_config_helpers/*.py) @@ -35,13 +31,13 @@ add_custom_command(OUTPUT ${PROJ_ROOT}/python/paddle/v2/framework/core.so add_custom_target(copy_paddle_pybind ALL DEPENDS ${PROJ_ROOT}/python/paddle/v2/framework/core.so) -add_custom_command(OUTPUT ${OUTPUT_DIR}/.timestamp +add_custom_command(OUTPUT ${PADDLE_PYTHON_BUILD_DIR}/.timestamp COMMAND env ${py_env} ${PYTHON_EXECUTABLE} setup.py bdist_wheel - COMMAND ${CMAKE_COMMAND} -E touch ${OUTPUT_DIR}/.timestamp + COMMAND ${CMAKE_COMMAND} -E touch ${PADDLE_PYTHON_BUILD_DIR}/.timestamp DEPENDS gen_proto_py copy_paddle_pybind framework_py_proto ${PY_FILES} ${external_project_dependencies} ${COPY_PADDLE_MASTER}) add_custom_target(paddle_python ALL DEPENDS - ${OUTPUT_DIR}/.timestamp paddle_pserver_main paddle_trainer paddle_merge_model python_api_wheel) + ${PADDLE_PYTHON_BUILD_DIR}/.timestamp paddle_pserver_main paddle_trainer paddle_merge_model python_api_wheel) set(PADDLE_PYTHON_PACKAGE_DIR ${CMAKE_CURRENT_BINARY_DIR}/dist/) From b1a8a9f61adce81d71d312cbf2ff3c86e9f517e1 Mon Sep 17 00:00:00 2001 From: Superjom Date: Thu, 10 Aug 2017 12:46:57 +0800 Subject: [PATCH 32/55] tensor slight improve --- paddle/framework/tensor.h | 4 ++-- paddle/framework/tensor_impl.h | 8 ++++++-- 2 files changed, 8 insertions(+), 4 deletions(-) diff --git a/paddle/framework/tensor.h b/paddle/framework/tensor.h index b57958591f..cd1b4de426 100644 --- a/paddle/framework/tensor.h +++ b/paddle/framework/tensor.h @@ -79,11 +79,11 @@ class Tensor { inline const DDim& dims() const; /*! Resize the dimensions of the memory block. */ - inline void Resize(const DDim& dims); + inline Tensor& Resize(const DDim& dims); /*! The internal of two tensors share the same memory block. */ template - inline void ShareDataWith(const Tensor& src); + inline Tensor& ShareDataWith(const Tensor& src); /** * @brief Copy the content of external tensor to a new place. diff --git a/paddle/framework/tensor_impl.h b/paddle/framework/tensor_impl.h index 8d9bec6dc9..a212b5f54f 100644 --- a/paddle/framework/tensor_impl.h +++ b/paddle/framework/tensor_impl.h @@ -78,9 +78,10 @@ inline T* Tensor::mutable_data(platform::Place place) { } template -inline void Tensor::ShareDataWith(const Tensor& src) { +inline Tensor& Tensor::ShareDataWith(const Tensor& src) { src.check_memory_size(); *this = src; + return *this; } template @@ -136,7 +137,10 @@ inline Tensor Tensor::Slice(const int& begin_idx, const int& end_idx) const { return dst; } -inline void Tensor::Resize(const DDim& dims) { dims_ = dims; } +inline Tensor& Tensor::Resize(const DDim& dims) { + dims_ = dims; + return *this; +} inline const DDim& Tensor::dims() const { return dims_; } From dbf4035d0ab7f54b5d18e92539610fcd15a5cfdb Mon Sep 17 00:00:00 2001 From: Superjom Date: Thu, 10 Aug 2017 12:52:32 +0800 Subject: [PATCH 33/55] add a error message to tensor --- paddle/framework/tensor_impl.h | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/paddle/framework/tensor_impl.h b/paddle/framework/tensor_impl.h index a212b5f54f..7d7263b899 100644 --- a/paddle/framework/tensor_impl.h +++ b/paddle/framework/tensor_impl.h @@ -23,9 +23,11 @@ template inline void Tensor::check_memory_size() const { PADDLE_ENFORCE_NOT_NULL( holder_, "Tenosr holds no memory. Call Tensor::mutable_data first."); - PADDLE_ENFORCE_GE(holder_->size(), product(dims_) * sizeof(T) + offset_, - "Tensor's dims_ is out of bound. Call Tensor::mutable_data " - "first to re-allocate memory."); + PADDLE_ENFORCE_GE( + holder_->size(), product(dims_) * sizeof(T) + offset_, + "Tensor's dims_ is out of bound. Call Tensor::mutable_data " + "first to re-allocate memory.\n" + "or maybe the required data-type mismatches the data already stored."); } template From a475a57d9ba2d70477ef072a0bcf7c3254b4afeb Mon Sep 17 00:00:00 2001 From: tensor-tang Date: Thu, 10 Aug 2017 13:02:43 +0800 Subject: [PATCH 34/55] rename files and classes, use uppercase of Mkldnn and Cpu --- paddle/gserver/CMakeLists.txt | 8 +-- .../layers/{MkldnnBase.h => MKLDNNBase.h} | 26 ++++----- .../{MkldnnFcLayer.cpp => MKLDNNFcLayer.cpp} | 22 ++++---- .../{MkldnnFcLayer.h => MKLDNNFcLayer.h} | 12 ++--- .../layers/{MkldnnLayer.h => MKLDNNLayer.h} | 22 ++++---- paddle/gserver/tests/CMakeLists.txt | 8 +-- .../{MkldnnTester.cpp => MKLDNNTester.cpp} | 54 +++++++++---------- .../tests/{MkldnnTester.h => MKLDNNTester.h} | 8 +-- .../{test_Mkldnn.cpp => test_MKLDNN.cpp} | 6 +-- 9 files changed, 83 insertions(+), 83 deletions(-) rename paddle/gserver/layers/{MkldnnBase.h => MKLDNNBase.h} (77%) rename paddle/gserver/layers/{MkldnnFcLayer.cpp => MKLDNNFcLayer.cpp} (94%) rename paddle/gserver/layers/{MkldnnFcLayer.h => MKLDNNFcLayer.h} (86%) rename paddle/gserver/layers/{MkldnnLayer.h => MKLDNNLayer.h} (88%) rename paddle/gserver/tests/{MkldnnTester.cpp => MKLDNNTester.cpp} (89%) rename paddle/gserver/tests/{MkldnnTester.h => MKLDNNTester.h} (95%) rename paddle/gserver/tests/{test_Mkldnn.cpp => test_MKLDNN.cpp} (96%) diff --git a/paddle/gserver/CMakeLists.txt b/paddle/gserver/CMakeLists.txt index 1305d5438a..62cff9361c 100644 --- a/paddle/gserver/CMakeLists.txt +++ b/paddle/gserver/CMakeLists.txt @@ -25,13 +25,13 @@ filter_test(GSERVER_HEADER) filter_test(GSERVER_SOURCES) if(NOT WITH_MKLDNN) - file(GLOB_RECURSE DNN_HEADER RELATIVE "${CMAKE_CURRENT_SOURCE_DIR}" "Mkldnn*.h") - file(GLOB_RECURSE DNN_SOURCES RELATIVE "${CMAKE_CURRENT_SOURCE_DIR}" "Mkldnn*.cpp") + file(GLOB_RECURSE DNN_HEADER RELATIVE "${CMAKE_CURRENT_SOURCE_DIR}" "MKLDNN*.h") + file(GLOB_RECURSE DNN_SOURCES RELATIVE "${CMAKE_CURRENT_SOURCE_DIR}" "MKLDNN*.cpp") list(REMOVE_ITEM GSERVER_HEADER ${DNN_HEADER}) list(REMOVE_ITEM GSERVER_SOURCES ${DNN_SOURCES}) - message(STATUS "Skip compiling with Mkldnnlayers and MkldnnActivations") + message(STATUS "Skip compiling with MKLDNNLayers and MKLDNNActivations") else() - message(STATUS "Compile with Mkldnnlayers and MkldnnActivations") + message(STATUS "Compile with MKLDNNLayers and MKLDNNActivations") endif() if(NOT WITH_GPU) diff --git a/paddle/gserver/layers/MkldnnBase.h b/paddle/gserver/layers/MKLDNNBase.h similarity index 77% rename from paddle/gserver/layers/MkldnnBase.h rename to paddle/gserver/layers/MKLDNNBase.h index 63fd67a850..4c0234e7b3 100644 --- a/paddle/gserver/layers/MkldnnBase.h +++ b/paddle/gserver/layers/MKLDNNBase.h @@ -30,26 +30,26 @@ typedef enum { * @brief MKLDNN CPU engine. * */ -class CpuEngine { +class CPUEngine { public: - static CpuEngine& Instance() { + static CPUEngine& Instance() { // Thread-safe in C++11. - static CpuEngine myInstance; + static CPUEngine myInstance; return myInstance; } // Disallow copy or move - CpuEngine(const CpuEngine&) = delete; // Copy constructor - CpuEngine(CpuEngine&&) = delete; // Move constructor - CpuEngine& operator=(const CpuEngine&) = delete; // Copy assignment - CpuEngine& operator=(CpuEngine&&) = delete; // Move assignment + CPUEngine(const CPUEngine&) = delete; // Copy constructor + CPUEngine(CPUEngine&&) = delete; // Move constructor + CPUEngine& operator=(const CPUEngine&) = delete; // Copy assignment + CPUEngine& operator=(CPUEngine&&) = delete; // Move assignment mkldnn::engine& getEngine() { return cpuEngine_; } protected: - CpuEngine() : cpuEngine_(mkldnn::engine::cpu, 0) {} - // CpuEngine() : cpuEngine_(mkldnn::engine::cpu_lazy, 0) {} - ~CpuEngine() {} + CPUEngine() : cpuEngine_(mkldnn::engine::cpu, 0) {} + // CPUEngine() : cpuEngine_(mkldnn::engine::cpu_lazy, 0) {} + ~CPUEngine() {} private: mkldnn::engine cpuEngine_; @@ -59,11 +59,11 @@ private: * @brief MKLDNN Stream. * */ -class MkldnnStream { +class MKLDNNStream { public: - MkldnnStream() : ready_(false) { resetState(); } + MKLDNNStream() : ready_(false) { resetState(); } - virtual ~MkldnnStream() {} + virtual ~MKLDNNStream() {} /** * @brief Submit stream diff --git a/paddle/gserver/layers/MkldnnFcLayer.cpp b/paddle/gserver/layers/MKLDNNFcLayer.cpp similarity index 94% rename from paddle/gserver/layers/MkldnnFcLayer.cpp rename to paddle/gserver/layers/MKLDNNFcLayer.cpp index f89db169ef..30f567eaf8 100644 --- a/paddle/gserver/layers/MkldnnFcLayer.cpp +++ b/paddle/gserver/layers/MKLDNNFcLayer.cpp @@ -12,7 +12,7 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. */ -#include "MkldnnFcLayer.h" +#include "MKLDNNFcLayer.h" #include "paddle/utils/Logging.h" #include "paddle/utils/Stat.h" @@ -24,11 +24,11 @@ typedef inner_product_backward_data fc_bwdData; namespace paddle { -REGISTER_LAYER(mkldnn_fc, MkldnnFcLayer); +REGISTER_LAYER(mkldnn_fc, MKLDNNFcLayer); -bool MkldnnFcLayer::init(const LayerMap& layerMap, +bool MKLDNNFcLayer::init(const LayerMap& layerMap, const ParameterMap& parameterMap) { - if (!MkldnnLayer::init(layerMap, parameterMap)) { + if (!MKLDNNLayer::init(layerMap, parameterMap)) { return false; } @@ -56,7 +56,7 @@ bool MkldnnFcLayer::init(const LayerMap& layerMap, return true; } -void MkldnnFcLayer::convertWeightsFromPaddle() { +void MKLDNNFcLayer::convertWeightsFromPaddle() { if (FLAGS_use_mkldnn_wgt) { return; } @@ -81,7 +81,7 @@ void MkldnnFcLayer::convertWeightsFromPaddle() { hasInitedWgt_ = true; } -void MkldnnFcLayer::convertWeightsToPaddle() { +void MKLDNNFcLayer::convertWeightsToPaddle() { MatrixPtr dnnWgt = weight_->getW(); MatrixPtr paddleWgt; dnnWgt->transpose(paddleWgt, true); @@ -92,7 +92,7 @@ void MkldnnFcLayer::convertWeightsToPaddle() { dnnWgtT->copyFrom(*paddleWgt); } -void MkldnnFcLayer::reshape() { +void MKLDNNFcLayer::reshape() { const Argument& input = getInput(0); int batchSize = input.getBatchSize(); if (bs_ == batchSize) { @@ -129,7 +129,7 @@ void MkldnnFcLayer::reshape() { convertWeightsFromPaddle(); } -void MkldnnFcLayer::resetFwd() { +void MKLDNNFcLayer::resetFwd() { bool hasBias = biases_ && biases_->getW(); real* iData = getInputValue(0)->getData(); real* oData = getOutputValue()->getData(); @@ -166,7 +166,7 @@ void MkldnnFcLayer::resetFwd() { pipelineFwd_.push_back(*fwd_); } -void MkldnnFcLayer::resetBwd() { +void MKLDNNFcLayer::resetBwd() { if (!needResetBwd_) { return; } @@ -231,7 +231,7 @@ void MkldnnFcLayer::resetBwd() { pipelineBwd_.push_back(*bwdData_); } -void MkldnnFcLayer::forward(PassType passType) { +void MKLDNNFcLayer::forward(PassType passType) { Layer::forward(passType); reshape(); @@ -253,7 +253,7 @@ void MkldnnFcLayer::forward(PassType passType) { } } -void MkldnnFcLayer::backward(const UpdateCallback& callback) { +void MKLDNNFcLayer::backward(const UpdateCallback& callback) { /* Do derivation */ { REGISTER_TIMER_INFO("BpActTimer", getName().c_str()); backwardActivation(); diff --git a/paddle/gserver/layers/MkldnnFcLayer.h b/paddle/gserver/layers/MKLDNNFcLayer.h similarity index 86% rename from paddle/gserver/layers/MkldnnFcLayer.h rename to paddle/gserver/layers/MKLDNNFcLayer.h index c4c0fa1c41..dffae27d7b 100644 --- a/paddle/gserver/layers/MkldnnFcLayer.h +++ b/paddle/gserver/layers/MKLDNNFcLayer.h @@ -14,17 +14,17 @@ limitations under the License. */ #pragma once -#include "MkldnnLayer.h" +#include "MKLDNNLayer.h" #include "mkldnn.hpp" namespace paddle { /** - * @brief A subclass of MkldnnLayer fc layer. + * @brief A subclass of MKLDNNLayer fc layer. * * The config file api is mkldnn_fc */ -class MkldnnFcLayer : public MkldnnLayer { +class MKLDNNFcLayer : public MKLDNNLayer { protected: // input layer size, can not be change after init size_t iLayerSize_; // == ic * ih * iw @@ -37,10 +37,10 @@ protected: std::unique_ptr biases_; public: - explicit MkldnnFcLayer(const LayerConfig& config) - : MkldnnLayer(config), hasInitedWgt_(false), hasSpatial_(true) {} + explicit MKLDNNFcLayer(const LayerConfig& config) + : MKLDNNLayer(config), hasInitedWgt_(false), hasSpatial_(true) {} - ~MkldnnFcLayer() {} + ~MKLDNNFcLayer() {} bool init(const LayerMap& layerMap, const ParameterMap& parameterMap) override; diff --git a/paddle/gserver/layers/MkldnnLayer.h b/paddle/gserver/layers/MKLDNNLayer.h similarity index 88% rename from paddle/gserver/layers/MkldnnLayer.h rename to paddle/gserver/layers/MKLDNNLayer.h index 620bdfc984..63e29f447e 100644 --- a/paddle/gserver/layers/MkldnnLayer.h +++ b/paddle/gserver/layers/MKLDNNLayer.h @@ -16,7 +16,7 @@ limitations under the License. */ #include #include "Layer.h" -#include "MkldnnBase.h" +#include "MKLDNNBase.h" #include "mkldnn.hpp" DECLARE_bool(use_mkldnn); @@ -24,14 +24,14 @@ DECLARE_bool(use_mkldnn_wgt); namespace paddle { -class MkldnnLayer; -typedef std::shared_ptr MkldnnLayerPtr; +class MKLDNNLayer; +typedef std::shared_ptr MKLDNNLayerPtr; /** - * @brief Base class of Mkldnnlayer. + * @brief Base class of MKLDNNlayer. * */ -class MkldnnLayer : public Layer { +class MKLDNNLayer : public Layer { protected: // batch size int bs_; @@ -45,14 +45,14 @@ protected: // mkldnn engine, stream and primivtives mkldnn::engine engine_; - std::shared_ptr stream_; + std::shared_ptr stream_; std::shared_ptr fwd_; std::shared_ptr bwdWgt_; std::shared_ptr bwdData_; std::vector pipelineFwd_; std::vector pipelineBwd_; - // TODO(TJ): change below memory as MkldnnMatrixPtr type + // TODO(TJ): change below memory as MKLDNNMatrixPtr type std::shared_ptr inVal_; std::shared_ptr inGrad_; std::shared_ptr outVal_; @@ -63,7 +63,7 @@ protected: std::shared_ptr biasGrad_; public: - explicit MkldnnLayer(const LayerConfig& config) + explicit MKLDNNLayer(const LayerConfig& config) : Layer(config), bs_(0), ic_(0), @@ -79,7 +79,7 @@ public: bwdWgt_(nullptr), bwdData_(nullptr) {} - ~MkldnnLayer() {} + ~MKLDNNLayer() {} virtual bool init(const LayerMap& layerMap, const ParameterMap& parameterMap) { @@ -90,8 +90,8 @@ public: CHECK(FLAGS_use_mkldnn) << "MkldnnLayers only support use_mkldnn." << "Please set WITH_MKLDNN=ON " << "and set use_mkldnn=True"; - stream_.reset(new MkldnnStream()); - engine_ = CpuEngine::Instance().getEngine(); + stream_.reset(new MKLDNNStream()); + engine_ = CPUEngine::Instance().getEngine(); // TODO(TJ): deivecId return true; diff --git a/paddle/gserver/tests/CMakeLists.txt b/paddle/gserver/tests/CMakeLists.txt index bcfc85aea0..ade5f633b4 100644 --- a/paddle/gserver/tests/CMakeLists.txt +++ b/paddle/gserver/tests/CMakeLists.txt @@ -20,11 +20,11 @@ add_test(NAME test_LayerGrad ########## test_Mkldnn layers and activations ########## if(WITH_MKLDNN) - add_unittest_without_exec(test_Mkldnn - test_Mkldnn.cpp - MkldnnTester.cpp + add_unittest_without_exec(test_MKLDNN + test_MKLDNN.cpp + MKLDNNTester.cpp LayerGradUtil.cpp) - add_test(NAME test_Mkldnn COMMAND test_Mkldnn) + add_test(NAME test_MKLDNN COMMAND test_MKLDNN) endif() ################ test_CRFLayerGrad #################### diff --git a/paddle/gserver/tests/MkldnnTester.cpp b/paddle/gserver/tests/MKLDNNTester.cpp similarity index 89% rename from paddle/gserver/tests/MkldnnTester.cpp rename to paddle/gserver/tests/MKLDNNTester.cpp index 9232e2fdcd..d91e4ed60c 100644 --- a/paddle/gserver/tests/MkldnnTester.cpp +++ b/paddle/gserver/tests/MKLDNNTester.cpp @@ -12,14 +12,14 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. */ -#include "MkldnnTester.h" -#include "paddle/gserver/layers/MkldnnBase.h" -#include "paddle/gserver/layers/MkldnnLayer.h" +#include "MKLDNNTester.h" +#include "paddle/gserver/layers/MKLDNNBase.h" +#include "paddle/gserver/layers/MKLDNNLayer.h" namespace paddle { // init data layer and test layer of both dnn and reference -void MkldnnTester::reset(const TestConfig& dnn, +void MKLDNNTester::reset(const TestConfig& dnn, const TestConfig& ref, size_t batchSize) { const bool trans = false; @@ -71,7 +71,7 @@ void MkldnnTester::reset(const TestConfig& dnn, setInputImgSize(); } -void MkldnnTester::setInputImgSize() { +void MKLDNNTester::setInputImgSize() { for (size_t n = 0; n < dataLayers_.size(); ++n) { for (size_t i = 0; i < dataLayers_[n].size(); ++i) { // TODO(TJ): fix me when concat and elewise ready @@ -82,7 +82,7 @@ void MkldnnTester::setInputImgSize() { } // init randome parameters of ref, and copy to mkldnn -void MkldnnTester::randomWgtDatas() { +void MKLDNNTester::randomWgtDatas() { EXPECT_EQ(parameters_[DNN].size(), parameters_[REF].size()); for (size_t i = 0; i < parameters_[REF].size(); ++i) { const VectorPtr& dnnValue = parameters_[DNN][i]->getBuf(PARAMETER_VALUE); @@ -96,7 +96,7 @@ void MkldnnTester::randomWgtDatas() { } // random botdata of ref layer and copy same to mkldnn -void MkldnnTester::randomBotDatas() { +void MKLDNNTester::randomBotDatas() { CHECK_EQ(dataLayers_.size(), NUM); for (size_t i = 0; i < dataLayers_[DNN].size(); ++i) { dataLayers_[REF][i]->getOutputValue()->randomizeUniform(); @@ -107,14 +107,14 @@ void MkldnnTester::randomBotDatas() { } } -void MkldnnTester::randomTopDiffs() { +void MKLDNNTester::randomTopDiffs() { refLayer_->getOutputGrad()->randomizeUniform(); dnnLayer_->getOutputGrad()->copyFrom(*(refLayer_->getOutputGrad())); VLOG(lvl_) << "Random dom Backward Input, TopDiff: "; printMatrix(refLayer_->getOutputGrad()); } -void MkldnnTester::checkForward() { +void MKLDNNTester::checkForward() { printTopDatas(); double delta = compareMatrix(testLayers_[DNN]->getOutputValue(), testLayers_[REF]->getOutputValue()); @@ -122,7 +122,7 @@ void MkldnnTester::checkForward() { EXPECT_LE(fabs(delta), eps_); } -void MkldnnTester::checkBackwardData() { +void MKLDNNTester::checkBackwardData() { const bool isBN = dnnLayer_->getType() == "mkldnn_batch_norm"; for (size_t i = 0; i < dataLayers_[DNN].size(); ++i) { const MatrixPtr& dnnDiff = dataLayers_[DNN][i]->getOutputGrad(); @@ -141,13 +141,13 @@ void MkldnnTester::checkBackwardData() { } } -void MkldnnTester::checkBackwardWgts() { +void MKLDNNTester::checkBackwardWgts() { CHECK_EQ(parameters_[DNN].size(), parameters_[REF].size()); vector dnnWgts; // used to temply save mkldnn weights saveWgt(parameters_[DNN], dnnWgts); - const MkldnnLayerPtr dnnlayer = - std::dynamic_pointer_cast(dnnLayer_); + const MKLDNNLayerPtr dnnlayer = + std::dynamic_pointer_cast(dnnLayer_); CHECK(dnnlayer); dnnlayer->convertWeightsToPaddle(); for (size_t i = 0; i < parameters_[DNN].size(); ++i) { @@ -166,7 +166,7 @@ void MkldnnTester::checkBackwardWgts() { restoreWgt(dnnWgts, parameters_[DNN]); } -void MkldnnTester::saveWgt(const vector& from, +void MKLDNNTester::saveWgt(const vector& from, vector& to) { const bool useGpu = false; to.resize(from.size()); @@ -177,7 +177,7 @@ void MkldnnTester::saveWgt(const vector& from, } } -void MkldnnTester::restoreWgt(const vector& from, +void MKLDNNTester::restoreWgt(const vector& from, vector& to) { CHECK_EQ(from.size(), to.size()); for (size_t i = 0; i < from.size(); ++i) { @@ -187,7 +187,7 @@ void MkldnnTester::restoreWgt(const vector& from, } // clear parameters grad -void MkldnnTester::clearWgtDiffs() { +void MKLDNNTester::clearWgtDiffs() { for (size_t n = 0; n < parameters_.size(); ++n) { for (size_t i = 0; i < parameters_[n].size(); ++i) { const VectorPtr& grad = parameters_[n][i]->getBuf(PARAMETER_GRADIENT); @@ -198,7 +198,7 @@ void MkldnnTester::clearWgtDiffs() { } } -void MkldnnTester::clearBotDiffs() { +void MKLDNNTester::clearBotDiffs() { // dnn and ref for (size_t n = 0; n < dataLayers_.size(); ++n) { // all inputs layers @@ -208,7 +208,7 @@ void MkldnnTester::clearBotDiffs() { } } -void MkldnnTester::clearBotDiffs(int n) { +void MKLDNNTester::clearBotDiffs(int n) { CHECK_LT(n, NUM); // all inputs layers for (size_t i = 0; i < dataLayers_[n].size(); ++i) { @@ -216,13 +216,13 @@ void MkldnnTester::clearBotDiffs(int n) { } } -void MkldnnTester::clearTopDatas() { +void MKLDNNTester::clearTopDatas() { for (size_t i = 0; i < testLayers_.size(); ++i) { testLayers_[i]->getOutputValue()->zeroMem(); } } -void MkldnnTester::printTopDatas() { +void MKLDNNTester::printTopDatas() { if (!log_) { return; } @@ -233,7 +233,7 @@ void MkldnnTester::printTopDatas() { } } -void MkldnnTester::printMatrix(const MatrixPtr& m) { +void MKLDNNTester::printMatrix(const MatrixPtr& m) { if (!log_) { return; } @@ -243,7 +243,7 @@ void MkldnnTester::printMatrix(const MatrixPtr& m) { VLOG(lvl_) << std::endl << ostr.str(); } -void MkldnnTester::printVector(const VectorPtr& v) { +void MKLDNNTester::printVector(const VectorPtr& v) { if (!log_) { return; } @@ -253,7 +253,7 @@ void MkldnnTester::printVector(const VectorPtr& v) { VLOG(lvl_) << std::endl << ostr.str(); } -double MkldnnTester::getDelta(const real* d1, +double MKLDNNTester::getDelta(const real* d1, const real* d2, size_t len, const float failRate, @@ -280,17 +280,17 @@ double MkldnnTester::getDelta(const real* d1, return (failCnt / (float)len) > failRate ? maxOut : delta / sum; } -double MkldnnTester::compareMatrix(const MatrixPtr& m1, const MatrixPtr& m2) { +double MKLDNNTester::compareMatrix(const MatrixPtr& m1, const MatrixPtr& m2) { CHECK_EQ(m1->getElementCnt(), m2->getElementCnt()); return getDelta(m1->getData(), m2->getData(), m1->getElementCnt()); } -double MkldnnTester::compareVector(const VectorPtr& v1, const VectorPtr& v2) { +double MKLDNNTester::compareVector(const VectorPtr& v1, const VectorPtr& v2) { CHECK_EQ(v1->getSize(), v2->getSize()); return getDelta(v1->getData(), v2->getData(), v1->getSize()); } -void MkldnnTester::runOnce() { +void MKLDNNTester::runOnce() { // test forward randomBotDatas(); dnnLayer_->forward(PASS_TRAIN); @@ -310,7 +310,7 @@ void MkldnnTester::runOnce() { clearBotDiffs(REF); } -void MkldnnTester::run(const TestConfig& dnn, +void MKLDNNTester::run(const TestConfig& dnn, const TestConfig& ref, size_t batchSize, size_t inputImgH, diff --git a/paddle/gserver/tests/MkldnnTester.h b/paddle/gserver/tests/MKLDNNTester.h similarity index 95% rename from paddle/gserver/tests/MkldnnTester.h rename to paddle/gserver/tests/MKLDNNTester.h index 7d1db870d1..d21f92d426 100644 --- a/paddle/gserver/tests/MkldnnTester.h +++ b/paddle/gserver/tests/MKLDNNTester.h @@ -17,7 +17,7 @@ limitations under the License. */ #include #include #include "LayerGradUtil.h" -#include "paddle/gserver/layers/MkldnnBase.h" +#include "paddle/gserver/layers/MKLDNNBase.h" namespace paddle { @@ -25,7 +25,7 @@ namespace paddle { * @brief test the functionality of Mkldnnlayers * refer to paddle original function */ -class MkldnnTester { +class MKLDNNTester { enum { DNN = 0, REF = 1, @@ -54,14 +54,14 @@ protected: size_t ih_, iw_; public: - explicit MkldnnTester(size_t iter = 3, float epsilon = 1e-4) { + explicit MKLDNNTester(size_t iter = 3, float epsilon = 1e-4) { iter_ = iter; eps_ = epsilon; log_ = false; lvl_ = MKLDNN_ALL; } - ~MkldnnTester() {} + ~MKLDNNTester() {} public: void run(const TestConfig& dnn, diff --git a/paddle/gserver/tests/test_Mkldnn.cpp b/paddle/gserver/tests/test_MKLDNN.cpp similarity index 96% rename from paddle/gserver/tests/test_Mkldnn.cpp rename to paddle/gserver/tests/test_MKLDNN.cpp index 8e4a8595d3..e1d2270df2 100644 --- a/paddle/gserver/tests/test_Mkldnn.cpp +++ b/paddle/gserver/tests/test_MKLDNN.cpp @@ -15,7 +15,7 @@ limitations under the License. */ #include #include #include -#include "MkldnnTester.h" +#include "MKLDNNTester.h" #include "ModelConfig.pb.h" using namespace paddle; // NOLINT @@ -43,7 +43,7 @@ void testFcLayer(const testFCDesc& pm) { /* size of weight= */ size_t(pm.oc * pm.ic * pm.ih * pm.iw)}); cfg.layerConfig.add_inputs(); - MkldnnTester tester; + MKLDNNTester tester; for (auto biasSize : {pm.oc, 0}) { cfg.biasSize = biasSize; TestConfig ref = cfg; @@ -54,7 +54,7 @@ void testFcLayer(const testFCDesc& pm) { } } -TEST(MkldnnLayer, fcLayer) { +TEST(MKLDNNLayer, FcLayer) { testFcLayer({/*bs*/ 2, /*ic*/ 2, /*oc*/ 3, /*ih*/ 1, /*iw*/ 1}); testFcLayer({/*bs*/ 3, /*ic*/ 7, /*oc*/ 19, /*ih*/ 1, /*iw*/ 1}); testFcLayer({/*bs*/ 8, /*ic*/ 16, /*oc*/ 32, /*ih*/ 13, /*iw*/ 13}); From 2d4c66d4b2bc723d2404d650b1adbd3b76a42b32 Mon Sep 17 00:00:00 2001 From: tensor-tang Date: Thu, 10 Aug 2017 13:42:27 +0800 Subject: [PATCH 35/55] add comments and todo lists --- paddle/gserver/layers/MKLDNNFcLayer.h | 3 +++ paddle/gserver/tests/MKLDNNTester.cpp | 12 +++++++----- paddle/gserver/tests/MKLDNNTester.h | 9 +++++---- 3 files changed, 15 insertions(+), 9 deletions(-) diff --git a/paddle/gserver/layers/MKLDNNFcLayer.h b/paddle/gserver/layers/MKLDNNFcLayer.h index dffae27d7b..7954852a23 100644 --- a/paddle/gserver/layers/MKLDNNFcLayer.h +++ b/paddle/gserver/layers/MKLDNNFcLayer.h @@ -29,7 +29,10 @@ protected: // input layer size, can not be change after init size_t iLayerSize_; // == ic * ih * iw + // if has already init the weight bool hasInitedWgt_; + + // if input layer has image size info (ih>1 && iw>1) bool hasSpatial_; // fc weight and bias diff --git a/paddle/gserver/tests/MKLDNNTester.cpp b/paddle/gserver/tests/MKLDNNTester.cpp index d91e4ed60c..99c8c4948c 100644 --- a/paddle/gserver/tests/MKLDNNTester.cpp +++ b/paddle/gserver/tests/MKLDNNTester.cpp @@ -123,7 +123,8 @@ void MKLDNNTester::checkForward() { } void MKLDNNTester::checkBackwardData() { - const bool isBN = dnnLayer_->getType() == "mkldnn_batch_norm"; + // TODO(TJ): uncomment me when batch norm ready + // const bool isBN = dnnLayer_->getType() == "mkldnn_batch_norm"; for (size_t i = 0; i < dataLayers_[DNN].size(); ++i) { const MatrixPtr& dnnDiff = dataLayers_[DNN][i]->getOutputGrad(); const MatrixPtr& refDiff = dataLayers_[REF][i]->getOutputGrad(); @@ -134,10 +135,11 @@ void MKLDNNTester::checkBackwardData() { double delta = compareMatrix(dnnDiff, refDiff); EXPECT_LE(fabs(delta), eps_); - if (isBN) { - // the other two inputs in batch norm are for moving mean and var - break; - } + // TODO(TJ): uncomment me when batch norm ready + // if (isBN) { + // // the other two inputs in batch norm are for moving mean and var + // break; + // } } } diff --git a/paddle/gserver/tests/MKLDNNTester.h b/paddle/gserver/tests/MKLDNNTester.h index d21f92d426..522eeaf24b 100644 --- a/paddle/gserver/tests/MKLDNNTester.h +++ b/paddle/gserver/tests/MKLDNNTester.h @@ -27,9 +27,9 @@ namespace paddle { */ class MKLDNNTester { enum { - DNN = 0, - REF = 1, - NUM = 2, + DNN = 0, // MKLDNN layer + REF = 1, // Reference layer + NUM = 2, // Number of total }; protected: @@ -107,7 +107,8 @@ private: * Get delta percent * if many(>failRate) wrong(abs(dnn-ref)/abs(ref)>thres) points return the * max(diff/ref) - * else return sum(abs(a-b)) / sum(abs(b)) should smaller than eps + * else return sum(abs(a-b)) / sum(abs(b)) + * The return value should smaller than eps when passing. */ double getDelta(const real* d1, const real* d2, From fb5cd7f8238be3503290b35597dd3b60a8e33b17 Mon Sep 17 00:00:00 2001 From: Liu Yiqun Date: Thu, 10 Aug 2017 06:35:11 +0000 Subject: [PATCH 36/55] Refine the error logs. --- .../examples/model_inference/common/common.h | 19 +++++++++++-------- 1 file changed, 11 insertions(+), 8 deletions(-) diff --git a/paddle/capi/examples/model_inference/common/common.h b/paddle/capi/examples/model_inference/common/common.h index a78522e4a7..e32f2f9836 100644 --- a/paddle/capi/examples/model_inference/common/common.h +++ b/paddle/capi/examples/model_inference/common/common.h @@ -3,18 +3,21 @@ #include #include -#define CHECK(stmt) \ - do { \ - paddle_error __err__ = stmt; \ - if (__err__ != kPD_NO_ERROR) { \ - fprintf(stderr, "Invoke paddle error %d \n" #stmt, __err__); \ - exit(__err__); \ - } \ +#define CHECK(stmt) \ + do { \ + paddle_error __err__ = stmt; \ + if (__err__ != kPD_NO_ERROR) { \ + fprintf(stderr, "Invoke paddle error %d in " #stmt "\n", __err__); \ + exit(__err__); \ + } \ } while (0) void* read_config(const char* filename, long* size) { FILE* file = fopen(filename, "r"); - if (file == NULL) return NULL; + if (file == NULL) { + fprintf(stderr, "Open %s error\n", filename); + return NULL; + } fseek(file, 0L, SEEK_END); *size = ftell(file); fseek(file, 0L, SEEK_SET); From c7a247b7afe2498be4442e84d394a73b076bfcff Mon Sep 17 00:00:00 2001 From: Liu Yiqun Date: Thu, 10 Aug 2017 06:56:18 +0000 Subject: [PATCH 37/55] Support to load parameters from buffer in c-api. --- paddle/capi/Arguments.cpp | 12 ++++++ paddle/capi/arguments.h | 13 ++++++ paddle/capi/gradient_machine.cpp | 9 ++++ paddle/capi/gradient_machine.h | 9 ++++ .../gradientmachines/GradientMachine.cpp | 43 +++++++++++++++++++ .../gradientmachines/GradientMachine.h | 2 + .../gradientmachines/NeuralNetwork.cpp | 2 + paddle/parameter/Parameter.cpp | 40 +++++++++-------- paddle/parameter/Parameter.h | 5 +++ 9 files changed, 117 insertions(+), 18 deletions(-) diff --git a/paddle/capi/Arguments.cpp b/paddle/capi/Arguments.cpp index 8b81ec69e6..1ec403077e 100644 --- a/paddle/capi/Arguments.cpp +++ b/paddle/capi/Arguments.cpp @@ -90,6 +90,18 @@ paddle_error paddle_arguments_set_ids(paddle_arguments args, return kPD_NO_ERROR; } +paddle_error paddle_arguments_set_frame_shape(paddle_arguments args, + uint64_t ID, + uint64_t frameHeight, + uint64_t frameWidth) { + if (args == nullptr) return kPD_NULLPTR; + auto a = castArg(args); + if (ID >= a->args.size()) return kPD_OUT_OF_RANGE; + a->args[ID].setFrameHeight(frameHeight); + a->args[ID].setFrameWidth(frameWidth); + return kPD_NO_ERROR; +} + paddle_error paddle_arguments_set_sequence_start_pos(paddle_arguments args, uint64_t ID, uint32_t nestedLevel, diff --git a/paddle/capi/arguments.h b/paddle/capi/arguments.h index d71ea26a5d..ba49d692ad 100644 --- a/paddle/capi/arguments.h +++ b/paddle/capi/arguments.h @@ -111,6 +111,19 @@ PD_API paddle_error paddle_arguments_set_ids(paddle_arguments args, uint64_t ID, paddle_ivector ids); +/** + * @brief paddle_arguments_set_frame_shape Set the fram size of one argument + * in array, which index is `ID`. + * @param [in] args arguments array + * @param [in] ID array index + * @param [out] ids integer vector pointer + * @return paddle_error + */ +PD_API paddle_error paddle_arguments_set_frame_shape(paddle_arguments args, + uint64_t ID, + uint64_t frameHeight, + uint64_t frameWidth); + /** * @brief PDArgsSetSequenceStartPos Set sequence start position vector of one * argument in array, which index is `ID`. diff --git a/paddle/capi/gradient_machine.cpp b/paddle/capi/gradient_machine.cpp index 00f76e0152..e2d2d30ddc 100644 --- a/paddle/capi/gradient_machine.cpp +++ b/paddle/capi/gradient_machine.cpp @@ -68,6 +68,15 @@ paddle_error paddle_gradient_machine_load_parameter_from_disk( return kPD_NO_ERROR; } +paddle_error paddle_gradient_machine_load_parameter_from_buffer( + paddle_gradient_machine machine, const char* buf, uint64_t length) { + auto m = cast(machine); + if (m == nullptr || buf == nullptr || m->machine == nullptr) + return kPD_NULLPTR; + m->machine->loadParameters(buf, length); + return kPD_NO_ERROR; +} + paddle_error paddle_gradient_machine_forward(paddle_gradient_machine machine, paddle_arguments inArgs, paddle_arguments outArgs, diff --git a/paddle/capi/gradient_machine.h b/paddle/capi/gradient_machine.h index d7e2dd9bf8..2426839050 100644 --- a/paddle/capi/gradient_machine.h +++ b/paddle/capi/gradient_machine.h @@ -45,6 +45,15 @@ PD_API paddle_error paddle_gradient_machine_create_for_inference( PD_API paddle_error paddle_gradient_machine_load_parameter_from_disk( paddle_gradient_machine machine, const char* path); +/** + * @brief Load parameter from buffer. + * @param machine Gradient Machine. + * @param buffer containing all parameters. + * @return paddle_error + */ +PD_API paddle_error paddle_gradient_machine_load_parameter_from_buffer( + paddle_gradient_machine machine, const char* buf, uint64_t length); + /** * @brief Forward a gradient machine * @param machine Gradient machine diff --git a/paddle/gserver/gradientmachines/GradientMachine.cpp b/paddle/gserver/gradientmachines/GradientMachine.cpp index b44e4dc202..b7678d9b2f 100644 --- a/paddle/gserver/gradientmachines/GradientMachine.cpp +++ b/paddle/gserver/gradientmachines/GradientMachine.cpp @@ -14,6 +14,7 @@ limitations under the License. */ #include "GradientMachine.h" +#include #include #include "paddle/utils/Logging.h" @@ -81,6 +82,48 @@ void GradientMachine::loadParameters(const std::string& dir) { } } +void GradientMachine::loadParameters(const char* buf, uint64_t length) { + LOG(INFO) << "Loading parameter from pre-load buffer"; + + CHECK_NOTNULL(buf); + CHECK_GE(length, static_cast(sizeof(uint64_t))); + + uint64_t numFiles = 0; + memcpy(&numFiles, buf, sizeof(uint64_t)); + uint64_t position = sizeof(uint64_t); + LOG(INFO) << "numFiles: " << numFiles << ", position: " << position; + + std::map offsets; + std::map lengths; + for (uint64_t i = 0; i < numFiles; i++) { + std::string filename(buf + position); + position += filename.size() + 1; + LOG(INFO) << "filename: " << filename << ", position: " << position; + uint64_t size = 0; + memcpy(&size, buf + position, sizeof(uint64_t)); + position += sizeof(uint64_t); + offsets[filename] = const_cast(buf + position); + lengths[filename] = size; + position += size; + CHECK_GE(length, position); + } + + CHECK_GE(offsets.size(), parameters_.size()); + + for (auto& para : parameters_) { + std::string filename = para->getName(); + if (para->isFullSize()) { + if (offsets.end() == offsets.find(filename)) { + para->loadMiss(filename); + } else { + std::istringstream stream( + std::string(offsets[filename], lengths[filename])); + para->load(stream); + } + } + } +} + void GradientMachine::randParameters() { LOG(INFO) << "Initing parameters.."; diff --git a/paddle/gserver/gradientmachines/GradientMachine.h b/paddle/gserver/gradientmachines/GradientMachine.h index f9c82a2bef..081518a9d2 100644 --- a/paddle/gserver/gradientmachines/GradientMachine.h +++ b/paddle/gserver/gradientmachines/GradientMachine.h @@ -221,6 +221,8 @@ public: void loadParameters(const std::string& dir); + void loadParameters(const char* buf, uint64_t length); + void randParameters(); virtual void getStats(real& cost, int64_t& numProcessed) { diff --git a/paddle/gserver/gradientmachines/NeuralNetwork.cpp b/paddle/gserver/gradientmachines/NeuralNetwork.cpp index cfa80a8936..148296d20b 100644 --- a/paddle/gserver/gradientmachines/NeuralNetwork.cpp +++ b/paddle/gserver/gradientmachines/NeuralNetwork.cpp @@ -24,6 +24,8 @@ limitations under the License. */ #include "paddle/gserver/layers/AgentLayer.h" #include "paddle/utils/Stat.h" +#include + namespace paddle { void parameterInitNN(int paramId, Parameter* para, diff --git a/paddle/parameter/Parameter.cpp b/paddle/parameter/Parameter.cpp index ebe36d4937..80dbb73a7d 100644 --- a/paddle/parameter/Parameter.cpp +++ b/paddle/parameter/Parameter.cpp @@ -314,27 +314,31 @@ bool Parameter::save(std::ostream& s) const { /** * Load parameter value from a file */ +bool Parameter::loadMiss(const std::string& filename) { + LOG(INFO) << "missing parameters [" << filename << "] while loading model."; + if (kMissParameterFail == FLAGS_load_missing_parameter_strategy) { + LOG(FATAL) << getName() << " missing, not allowed."; + return false; + } + if (kMissParameterRand == FLAGS_load_missing_parameter_strategy) { + LOG(INFO) << getName() << " missing, set to random."; + randomize(); + return true; + } + if (kMissParameterZero == FLAGS_load_missing_parameter_strategy) { + LOG(INFO) << getName() << " missing, set to zero."; + zeroMem(); + return true; + } + LOG(FATAL) << "unsupported load_missing_parameter_strategy: " + << FLAGS_load_missing_parameter_strategy; + return false; +} + bool Parameter::load(const std::string& filename) { std::ifstream fs(filename, std::ios_base::binary); if (!fs) { - LOG(INFO) << "missing parameters [" << filename << "] while loading model."; - if (kMissParameterFail == FLAGS_load_missing_parameter_strategy) { - LOG(FATAL) << getName() << " missing, not allowed."; - return false; - } - if (kMissParameterRand == FLAGS_load_missing_parameter_strategy) { - LOG(INFO) << getName() << " missing, set to random."; - randomize(); - return true; - } - if (kMissParameterZero == FLAGS_load_missing_parameter_strategy) { - LOG(INFO) << getName() << " missing, set to zero."; - zeroMem(); - return true; - } - LOG(FATAL) << "unsupported load_missing_parameter_strategy: " - << FLAGS_load_missing_parameter_strategy; - return false; + loadMiss(filename); } return load(fs); } diff --git a/paddle/parameter/Parameter.h b/paddle/parameter/Parameter.h index 0bac76f068..21932f6b6e 100644 --- a/paddle/parameter/Parameter.h +++ b/paddle/parameter/Parameter.h @@ -201,6 +201,11 @@ public: */ bool save(std::ostream& s) const; + /** + * Fill parameter when file is missed + */ + bool loadMiss(const std::string& filename); + /** * Load parameter value from a file */ From d111815c3257ba0a846b52255d5b791954cae75f Mon Sep 17 00:00:00 2001 From: Luo Tao Date: Thu, 10 Aug 2017 15:29:18 +0800 Subject: [PATCH 38/55] turn off with_mklml and with_mkldnn to fix ImportError of libmklml_intel.so --- CMakeLists.txt | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index b174831109..c7d743e193 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -36,8 +36,8 @@ include(simd) ################################ Configurations ####################################### option(WITH_GPU "Compile PaddlePaddle with NVIDIA GPU" ${CUDA_FOUND}) option(WITH_AVX "Compile PaddlePaddle with AVX intrinsics" ${AVX_FOUND}) -option(WITH_MKLDNN "Compile PaddlePaddle with mkl-dnn support." ${AVX_FOUND}) -option(WITH_MKLML "Compile PaddlePaddle with mklml package." ${AVX_FOUND}) +option(WITH_MKLDNN "Compile PaddlePaddle with mkl-dnn support." OFF) +option(WITH_MKLML "Compile PaddlePaddle with mklml package." OFF) option(WITH_DSO "Compile PaddlePaddle with dynamic linked CUDA" ON) option(WITH_TESTING "Compile PaddlePaddle with unit testing" ON) option(WITH_SWIG_PY "Compile PaddlePaddle with inference api" ON) From 2df628af884f78f9019e28dc1f242264581bbdb4 Mon Sep 17 00:00:00 2001 From: Yu Yang Date: Thu, 10 Aug 2017 15:47:30 +0800 Subject: [PATCH 39/55] Fix the bug between nvcc and boost Fix #3386 --- paddle/framework/attribute.h | 2 +- paddle/framework/ddim.h | 2 +- paddle/framework/operator.h | 2 +- paddle/platform/place.h | 2 +- paddle/platform/variant.h | 30 ++++++++++++++++++++++++++++++ 5 files changed, 34 insertions(+), 4 deletions(-) create mode 100644 paddle/platform/variant.h diff --git a/paddle/framework/attribute.h b/paddle/framework/attribute.h index 3a5820e9c6..49a62bedb6 100644 --- a/paddle/framework/attribute.h +++ b/paddle/framework/attribute.h @@ -14,7 +14,6 @@ limitations under the License. */ #pragma once -#include #include #include #include @@ -24,6 +23,7 @@ limitations under the License. */ #include "paddle/framework/attribute.pb.h" #include "paddle/framework/op_desc.pb.h" #include "paddle/platform/enforce.h" +#include "paddle/platform/variant.h" namespace paddle { namespace framework { diff --git a/paddle/framework/ddim.h b/paddle/framework/ddim.h index 5aa5af0c19..b2d9fec047 100644 --- a/paddle/framework/ddim.h +++ b/paddle/framework/ddim.h @@ -14,12 +14,12 @@ limitations under the License. */ #pragma once -#include #include #include #include #include "paddle/framework/dim.h" #include "paddle/platform/enforce.h" +#include "paddle/platform/variant.h" #include "unsupported/Eigen/CXX11/Tensor" namespace paddle { diff --git a/paddle/framework/operator.h b/paddle/framework/operator.h index 8949baf60e..50fc6d1013 100644 --- a/paddle/framework/operator.h +++ b/paddle/framework/operator.h @@ -15,7 +15,6 @@ limitations under the License. */ #pragma once #include -#include #include #include #include @@ -27,6 +26,7 @@ limitations under the License. */ #include "paddle/framework/tensor.h" #include "paddle/platform/device_context.h" #include "paddle/platform/place.h" +#include "paddle/platform/variant.h" #include "paddle/utils/Error.h" namespace paddle { diff --git a/paddle/platform/place.h b/paddle/platform/place.h index a82e8c942f..1117476bb3 100644 --- a/paddle/platform/place.h +++ b/paddle/platform/place.h @@ -14,8 +14,8 @@ limitations under the License. */ #pragma once -#include #include +#include "paddle/platform/variant.h" namespace paddle { namespace platform { diff --git a/paddle/platform/variant.h b/paddle/platform/variant.h new file mode 100644 index 0000000000..c5bc45ca72 --- /dev/null +++ b/paddle/platform/variant.h @@ -0,0 +1,30 @@ +/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. */ + +#pragma once + +#include + +#ifndef PADDLE_ONLY_CPU + +// Because boost's variadic templates has bug on nvcc, boost will disable +// BOOST_NO_CXX11_VARIADIC_TEMPLATES when GPU enabled on nvcc. +// Disable BOOST_NO_CXX11_VARIADIC_TEMPLATES on gcc/clang to generate same +// function symbols. +#ifndef BOOST_NO_CXX11_VARIADIC_TEMPLATES +#define BOOST_NO_CXX11_VARIADIC_TEMPLATES +#endif +#endif + +#include From 14f98c88e87caa36589bb6276e4e74fd63ea6ccd Mon Sep 17 00:00:00 2001 From: Yu Yang Date: Thu, 10 Aug 2017 15:54:51 +0800 Subject: [PATCH 40/55] Polish comments on paddle/platform/variant.h --- paddle/platform/variant.h | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/paddle/platform/variant.h b/paddle/platform/variant.h index c5bc45ca72..c2257af1b5 100644 --- a/paddle/platform/variant.h +++ b/paddle/platform/variant.h @@ -19,9 +19,11 @@ #ifndef PADDLE_ONLY_CPU // Because boost's variadic templates has bug on nvcc, boost will disable -// BOOST_NO_CXX11_VARIADIC_TEMPLATES when GPU enabled on nvcc. -// Disable BOOST_NO_CXX11_VARIADIC_TEMPLATES on gcc/clang to generate same +// variadic template support when GPU enabled on nvcc. +// Define BOOST_NO_CXX11_VARIADIC_TEMPLATES on gcc/clang to generate same // function symbols. +// +// https://github.com/PaddlePaddle/Paddle/issues/3386 #ifndef BOOST_NO_CXX11_VARIADIC_TEMPLATES #define BOOST_NO_CXX11_VARIADIC_TEMPLATES #endif From 1505e46be89e9a717ff5f206bfc48c97682c0cef Mon Sep 17 00:00:00 2001 From: Liu Yiqun Date: Thu, 10 Aug 2017 08:01:48 +0000 Subject: [PATCH 41/55] Refine the comments of c-api function, paddle_arguments_set_frame_shape. --- paddle/capi/arguments.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/paddle/capi/arguments.h b/paddle/capi/arguments.h index ba49d692ad..7c32524a00 100644 --- a/paddle/capi/arguments.h +++ b/paddle/capi/arguments.h @@ -116,7 +116,8 @@ PD_API paddle_error paddle_arguments_set_ids(paddle_arguments args, * in array, which index is `ID`. * @param [in] args arguments array * @param [in] ID array index - * @param [out] ids integer vector pointer + * @param [in] frameHeight maximum height of input images + * @param [in] frameWidth maximum width of input images * @return paddle_error */ PD_API paddle_error paddle_arguments_set_frame_shape(paddle_arguments args, From c326aae0cf9b975960a5e657ce4174ea795b78bb Mon Sep 17 00:00:00 2001 From: Yu Yang Date: Thu, 10 Aug 2017 16:12:22 +0800 Subject: [PATCH 42/55] Fix code style in gaussian_random_op.cu --- paddle/operators/gaussian_random_op.cu | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/paddle/operators/gaussian_random_op.cu b/paddle/operators/gaussian_random_op.cu index 54e4ae5d2b..0dd26f6df8 100644 --- a/paddle/operators/gaussian_random_op.cu +++ b/paddle/operators/gaussian_random_op.cu @@ -49,4 +49,4 @@ class GaussianRandomKernel : public framework::OpKernel { } // namespace paddle namespace ops = paddle::operators; -REGISTER_OP_GPU_KERNEL(gaussian_random, ops::GaussianRandomKernel); \ No newline at end of file +REGISTER_OP_GPU_KERNEL(gaussian_random, ops::GaussianRandomKernel); From 459111020111b3159c04045cc48317cd418fe039 Mon Sep 17 00:00:00 2001 From: Yu Yang Date: Thu, 10 Aug 2017 16:54:22 +0800 Subject: [PATCH 43/55] Fix gaussian_random_op compile error * Should always use `dynload::` for cuda function. * Fix cublas.h without DSO load. --- paddle/operators/gaussian_random_op.cu | 4 ++-- paddle/platform/dynload/cublas.h | 14 +++++++------- 2 files changed, 9 insertions(+), 9 deletions(-) diff --git a/paddle/operators/gaussian_random_op.cu b/paddle/operators/gaussian_random_op.cu index 54e4ae5d2b..c04637ae3e 100644 --- a/paddle/operators/gaussian_random_op.cu +++ b/paddle/operators/gaussian_random_op.cu @@ -40,8 +40,8 @@ class GaussianRandomKernel : public framework::OpKernel { &g, CURAND_RNG_PSEUDO_DEFAULT)); PADDLE_ENFORCE( platform::dynload::curandSetPseudoRandomGeneratorSeed(g, seed)); - curandGenerateNormal(g, data, framework::product(tensor->dims()), mean, - std); + platform::dynload::curandGenerateNormal( + g, data, framework::product(tensor->dims()), mean, std); } }; diff --git a/paddle/platform/dynload/cublas.h b/paddle/platform/dynload/cublas.h index c44b7240a8..aad8097dbb 100644 --- a/paddle/platform/dynload/cublas.h +++ b/paddle/platform/dynload/cublas.h @@ -48,13 +48,13 @@ extern void *cublas_dso_handle; }; \ extern DynLoad__##__name __name #else -#define DECLARE_DYNAMIC_LOAD_CUBLAS_WRAP(__name) \ - struct DynLoad__##__name { \ - inline template \ - cublasStatus_t operator()(Args... args) { \ - return __name(args...); \ - } \ - }; \ +#define DECLARE_DYNAMIC_LOAD_CUBLAS_WRAP(__name) \ + struct DynLoad__##__name { \ + template \ + inline cublasStatus_t operator()(Args... args) { \ + return __name(args...); \ + } \ + }; \ extern DynLoad__##__name __name #endif From 3f34ff8f9f92c8786312ab89b0912b39074790d4 Mon Sep 17 00:00:00 2001 From: Yu Yang Date: Thu, 10 Aug 2017 16:58:24 +0800 Subject: [PATCH 44/55] Comment test_gaussian_random_op for hotfix --- python/paddle/v2/framework/tests/CMakeLists.txt | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/python/paddle/v2/framework/tests/CMakeLists.txt b/python/paddle/v2/framework/tests/CMakeLists.txt index f6850e0651..55ed724e8f 100644 --- a/python/paddle/v2/framework/tests/CMakeLists.txt +++ b/python/paddle/v2/framework/tests/CMakeLists.txt @@ -23,6 +23,5 @@ py_test(test_rowwise_add_op SRCS test_rowwise_add_op.py) py_test(test_default_scope_funcs SRCS test_default_scope_funcs.py) py_test(test_operator SRCS test_operator.py) - -py_test(test_gaussian_random_op SRCS test_gaussian_random_op.py) +# py_test(test_gaussian_random_op SRCS test_gaussian_random_op.py) py_test(test_uniform_random_op SRCS test_uniform_random_op.py) From 3df8ee1ff778abf341b6391bec9b6a95001e004d Mon Sep 17 00:00:00 2001 From: qiaolongfei Date: Thu, 10 Aug 2017 18:22:41 +0800 Subject: [PATCH 45/55] use lib-python as python test dir --- cmake/generic.cmake | 2 +- python/CMakeLists.txt | 2 ++ 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/cmake/generic.cmake b/cmake/generic.cmake index 951642e70b..d2aab938d4 100644 --- a/cmake/generic.cmake +++ b/cmake/generic.cmake @@ -411,7 +411,7 @@ function(py_test TARGET_NAME) set(multiValueArgs SRCS DEPS) cmake_parse_arguments(py_test "${options}" "${oneValueArgs}" "${multiValueArgs}" ${ARGN}) add_test(NAME ${TARGET_NAME} - COMMAND env PYTHONPATH=${PROJ_ROOT}/paddle:${PADDLE_PYTHON_BUILD_DIR}/lib + COMMAND env PYTHONPATH=${PADDLE_PYTHON_BUILD_DIR}/lib-python python2 ${py_test_SRCS} WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}) endif() diff --git a/python/CMakeLists.txt b/python/CMakeLists.txt index 684691d240..0deff5ff08 100644 --- a/python/CMakeLists.txt +++ b/python/CMakeLists.txt @@ -34,6 +34,8 @@ add_custom_target(copy_paddle_pybind ALL DEPENDS ${PROJ_ROOT}/python/paddle/v2/f add_custom_command(OUTPUT ${PADDLE_PYTHON_BUILD_DIR}/.timestamp COMMAND env ${py_env} ${PYTHON_EXECUTABLE} setup.py bdist_wheel COMMAND ${CMAKE_COMMAND} -E touch ${PADDLE_PYTHON_BUILD_DIR}/.timestamp + COMMAND ${CMAKE_COMMAND} -E remove_directory ${PADDLE_PYTHON_BUILD_DIR}/lib-python + COMMAND ${CMAKE_COMMAND} -E copy_directory ${PADDLE_PYTHON_BUILD_DIR}/lib* ${PADDLE_PYTHON_BUILD_DIR}/lib-python DEPENDS gen_proto_py copy_paddle_pybind framework_py_proto ${PY_FILES} ${external_project_dependencies} ${COPY_PADDLE_MASTER}) add_custom_target(paddle_python ALL DEPENDS From d299528829a2ad022b11e7f05c7df1d585834372 Mon Sep 17 00:00:00 2001 From: Yu Yang Date: Thu, 10 Aug 2017 18:39:02 +0800 Subject: [PATCH 46/55] Add curandGenerateNormal to curand.h --- paddle/platform/dynload/curand.h | 1 + 1 file changed, 1 insertion(+) diff --git a/paddle/platform/dynload/curand.h b/paddle/platform/dynload/curand.h index d8c46bc41e..7bfe0778c7 100644 --- a/paddle/platform/dynload/curand.h +++ b/paddle/platform/dynload/curand.h @@ -55,6 +55,7 @@ extern void *curand_dso_handle; __macro(curandSetPseudoRandomGeneratorSeed); \ __macro(curandGenerateUniform); \ __macro(curandGenerateUniformDouble); \ + __macro(curandGenerateNormal); \ __macro(curandDestroyGenerator); CURAND_RAND_ROUTINE_EACH(DECLARE_DYNAMIC_LOAD_CURAND_WRAP); From 9dccdd77a1a86b6cf08c66dfef4bfecd94944817 Mon Sep 17 00:00:00 2001 From: Liu Yiqun Date: Thu, 10 Aug 2017 11:43:39 +0000 Subject: [PATCH 47/55] Add c-api interface, paddle_gradient_machine_create_for_inference_with_parameters, to create a gradient machine for inference using merged model with parameters which is genearted by `paddle merge_model`. --- cmake/flags.cmake | 10 +++++--- paddle/capi/gradient_machine.cpp | 25 +++++++++++++++++++ paddle/capi/gradient_machine.h | 12 +++++++++ .../gradientmachines/NeuralNetwork.cpp | 2 -- 4 files changed, 43 insertions(+), 6 deletions(-) diff --git a/cmake/flags.cmake b/cmake/flags.cmake index e26d8d9df3..b27eb71550 100644 --- a/cmake/flags.cmake +++ b/cmake/flags.cmake @@ -9,10 +9,12 @@ function(CheckCompilerCXX11Flag) if(${CMAKE_CXX_COMPILER_VERSION} VERSION_LESS 4.8) message(FATAL_ERROR "Unsupported GCC version. GCC >= 4.8 required.") endif() - # TODO(qijun) gcc 4.9 or later versions raise SEGV due to the optimization problem. - # Use Debug mode instead for now. - if(CMAKE_CXX_COMPILER_VERSION VERSION_GREATER 4.9 OR CMAKE_CXX_COMPILER_VERSION VERSION_EQUAL 4.9) - set(CMAKE_BUILD_TYPE "Debug" CACHE STRING "" FORCE) + if(NOT ANDROID) + # TODO(qijun) gcc 4.9 or later versions raise SEGV due to the optimization problem. + # Use Debug mode instead for now. + if(CMAKE_CXX_COMPILER_VERSION VERSION_GREATER 4.9 OR CMAKE_CXX_COMPILER_VERSION VERSION_EQUAL 4.9) + set(CMAKE_BUILD_TYPE "Debug" CACHE STRING "" FORCE) + endif() endif() elseif(CMAKE_CXX_COMPILER_ID STREQUAL "AppleClang" OR CMAKE_CXX_COMPILER_ID STREQUAL "Clang") # cmake >= 3.0 compiler id "AppleClang" on Mac OS X, otherwise "Clang" diff --git a/paddle/capi/gradient_machine.cpp b/paddle/capi/gradient_machine.cpp index e2d2d30ddc..f7ad30f3bf 100644 --- a/paddle/capi/gradient_machine.cpp +++ b/paddle/capi/gradient_machine.cpp @@ -54,6 +54,31 @@ paddle_error paddle_gradient_machine_create_for_inference( return kPD_NO_ERROR; } +paddle_error paddle_gradient_machine_create_for_inference_with_parameters( + paddle_gradient_machine* machine, void* mergedModel, uint64_t size) { + if (mergedModel == nullptr) return kPD_NULLPTR; + std::istringstream is(std::string(static_cast(mergedModel), size)); + int64_t modelConfigSize = 0; + is.read((char*)(&modelConfigSize), sizeof(modelConfigSize)); + std::string modelConfigProtobuf; + modelConfigProtobuf.resize(modelConfigSize); + is.read(&modelConfigProtobuf[0], modelConfigSize); + paddle::TrainerConfig config; + if (!config.ParseFromString(modelConfigProtobuf) || !config.IsInitialized()) { + return kPD_PROTOBUF_ERROR; + } + auto ptr = new paddle::capi::CGradientMachine(); + ptr->machine.reset(paddle::GradientMachine::create( + config.model_config(), CREATE_MODE_TESTING, {paddle::PARAMETER_VALUE})); + std::vector& parameters = ptr->machine->getParameters(); + for (auto& para : parameters) { + para->load(is); + } + + *machine = ptr; + return kPD_NO_ERROR; +} + paddle_error paddle_gradient_machine_destroy(paddle_gradient_machine machine) { delete cast(machine); return kPD_NO_ERROR; diff --git a/paddle/capi/gradient_machine.h b/paddle/capi/gradient_machine.h index 2426839050..2205e0e23a 100644 --- a/paddle/capi/gradient_machine.h +++ b/paddle/capi/gradient_machine.h @@ -36,6 +36,18 @@ typedef void* paddle_gradient_machine; PD_API paddle_error paddle_gradient_machine_create_for_inference( paddle_gradient_machine* machine, void* modelConfigProtobuf, int size); +/** + * @brief Create a gradient machine used for model inference, using config with + * parameters which is generated by `paddle merge_model`. + * @param [out] machine that used for model inference. + * @param [in] mergedModel + * @param [in] size + * @return paddle_error + */ +PD_API paddle_error +paddle_gradient_machine_create_for_inference_with_parameters( + paddle_gradient_machine* machine, void* mergedModel, uint64_t size); + /** * @brief Load parameter from disk. * @param machine Gradient Machine. diff --git a/paddle/gserver/gradientmachines/NeuralNetwork.cpp b/paddle/gserver/gradientmachines/NeuralNetwork.cpp index 148296d20b..cfa80a8936 100644 --- a/paddle/gserver/gradientmachines/NeuralNetwork.cpp +++ b/paddle/gserver/gradientmachines/NeuralNetwork.cpp @@ -24,8 +24,6 @@ limitations under the License. */ #include "paddle/gserver/layers/AgentLayer.h" #include "paddle/utils/Stat.h" -#include - namespace paddle { void parameterInitNN(int paramId, Parameter* para, From b49a1644ab9c04af301044cfcdfd0c90b8deaebb Mon Sep 17 00:00:00 2001 From: qijun Date: Thu, 10 Aug 2017 19:46:32 +0800 Subject: [PATCH 48/55] add soft links to gcc4.8 version --- Dockerfile | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/Dockerfile b/Dockerfile index 0d0c88f40c..f9beb1b25d 100644 --- a/Dockerfile +++ b/Dockerfile @@ -28,6 +28,16 @@ RUN apt-get update && \ wget unzip unrar tar xz-utils bzip2 gzip coreutils ntp \ curl sed grep graphviz libjpeg-dev zlib1g-dev \ python-matplotlib gcc-4.8 g++-4.8 \ + ln -sf gcc-4.8 /usr/bin/gcc && \ + ln -sf gcc-ar-4.8 /usr/bin/gcc-ar && \ + ln -sf gcc-nm-4.8 /usr/bin/gcc-nm && \ + ln -sf gcc-ranlib-4.8 /usr/bin/gcc-ranlib && \ + ln -sf gcc-4.8 /usr/bin/x86_64-linux-gnu-gcc && \ + ln -sf gcc-ar-4.8 /usr/bin/x86_64-linux-gnu-gcc-ar && \ + ln -sf gcc-nm-4.8 /usr/bin/x86_64-linux-gnu-gcc-nm && \ + ln -sf gcc-ranlib-4.8 /usr/bin/x86_64-linux-gnu-gcc-ranlib && \ + ln -sf g++-4.8 /usr/bin/g++ && \ + ln -sf g++-4.8 /usr/bin/x86_64-linux-gnu-g++ && \ automake locales clang-format swig doxygen cmake \ liblapack-dev liblapacke-dev libboost-dev \ clang-3.8 llvm-3.8 libclang-3.8-dev \ From 4f1f7e90aa170aef91ac2d60bdc89860f6933dd6 Mon Sep 17 00:00:00 2001 From: Liu Yiqun Date: Thu, 10 Aug 2017 11:51:31 +0000 Subject: [PATCH 49/55] Delete c-api interface, paddle_gradient_machine_load_parameter_from_buffer, and related codes in Paddle core. --- paddle/capi/gradient_machine.cpp | 9 ---- paddle/capi/gradient_machine.h | 9 ---- .../gradientmachines/GradientMachine.cpp | 43 ------------------- .../gradientmachines/GradientMachine.h | 2 - paddle/parameter/Parameter.cpp | 40 ++++++++--------- paddle/parameter/Parameter.h | 5 --- 6 files changed, 18 insertions(+), 90 deletions(-) diff --git a/paddle/capi/gradient_machine.cpp b/paddle/capi/gradient_machine.cpp index f7ad30f3bf..b3287552db 100644 --- a/paddle/capi/gradient_machine.cpp +++ b/paddle/capi/gradient_machine.cpp @@ -93,15 +93,6 @@ paddle_error paddle_gradient_machine_load_parameter_from_disk( return kPD_NO_ERROR; } -paddle_error paddle_gradient_machine_load_parameter_from_buffer( - paddle_gradient_machine machine, const char* buf, uint64_t length) { - auto m = cast(machine); - if (m == nullptr || buf == nullptr || m->machine == nullptr) - return kPD_NULLPTR; - m->machine->loadParameters(buf, length); - return kPD_NO_ERROR; -} - paddle_error paddle_gradient_machine_forward(paddle_gradient_machine machine, paddle_arguments inArgs, paddle_arguments outArgs, diff --git a/paddle/capi/gradient_machine.h b/paddle/capi/gradient_machine.h index 2205e0e23a..c613ade5b2 100644 --- a/paddle/capi/gradient_machine.h +++ b/paddle/capi/gradient_machine.h @@ -57,15 +57,6 @@ paddle_gradient_machine_create_for_inference_with_parameters( PD_API paddle_error paddle_gradient_machine_load_parameter_from_disk( paddle_gradient_machine machine, const char* path); -/** - * @brief Load parameter from buffer. - * @param machine Gradient Machine. - * @param buffer containing all parameters. - * @return paddle_error - */ -PD_API paddle_error paddle_gradient_machine_load_parameter_from_buffer( - paddle_gradient_machine machine, const char* buf, uint64_t length); - /** * @brief Forward a gradient machine * @param machine Gradient machine diff --git a/paddle/gserver/gradientmachines/GradientMachine.cpp b/paddle/gserver/gradientmachines/GradientMachine.cpp index b7678d9b2f..b44e4dc202 100644 --- a/paddle/gserver/gradientmachines/GradientMachine.cpp +++ b/paddle/gserver/gradientmachines/GradientMachine.cpp @@ -14,7 +14,6 @@ limitations under the License. */ #include "GradientMachine.h" -#include #include #include "paddle/utils/Logging.h" @@ -82,48 +81,6 @@ void GradientMachine::loadParameters(const std::string& dir) { } } -void GradientMachine::loadParameters(const char* buf, uint64_t length) { - LOG(INFO) << "Loading parameter from pre-load buffer"; - - CHECK_NOTNULL(buf); - CHECK_GE(length, static_cast(sizeof(uint64_t))); - - uint64_t numFiles = 0; - memcpy(&numFiles, buf, sizeof(uint64_t)); - uint64_t position = sizeof(uint64_t); - LOG(INFO) << "numFiles: " << numFiles << ", position: " << position; - - std::map offsets; - std::map lengths; - for (uint64_t i = 0; i < numFiles; i++) { - std::string filename(buf + position); - position += filename.size() + 1; - LOG(INFO) << "filename: " << filename << ", position: " << position; - uint64_t size = 0; - memcpy(&size, buf + position, sizeof(uint64_t)); - position += sizeof(uint64_t); - offsets[filename] = const_cast(buf + position); - lengths[filename] = size; - position += size; - CHECK_GE(length, position); - } - - CHECK_GE(offsets.size(), parameters_.size()); - - for (auto& para : parameters_) { - std::string filename = para->getName(); - if (para->isFullSize()) { - if (offsets.end() == offsets.find(filename)) { - para->loadMiss(filename); - } else { - std::istringstream stream( - std::string(offsets[filename], lengths[filename])); - para->load(stream); - } - } - } -} - void GradientMachine::randParameters() { LOG(INFO) << "Initing parameters.."; diff --git a/paddle/gserver/gradientmachines/GradientMachine.h b/paddle/gserver/gradientmachines/GradientMachine.h index 081518a9d2..f9c82a2bef 100644 --- a/paddle/gserver/gradientmachines/GradientMachine.h +++ b/paddle/gserver/gradientmachines/GradientMachine.h @@ -221,8 +221,6 @@ public: void loadParameters(const std::string& dir); - void loadParameters(const char* buf, uint64_t length); - void randParameters(); virtual void getStats(real& cost, int64_t& numProcessed) { diff --git a/paddle/parameter/Parameter.cpp b/paddle/parameter/Parameter.cpp index 80dbb73a7d..ebe36d4937 100644 --- a/paddle/parameter/Parameter.cpp +++ b/paddle/parameter/Parameter.cpp @@ -314,31 +314,27 @@ bool Parameter::save(std::ostream& s) const { /** * Load parameter value from a file */ -bool Parameter::loadMiss(const std::string& filename) { - LOG(INFO) << "missing parameters [" << filename << "] while loading model."; - if (kMissParameterFail == FLAGS_load_missing_parameter_strategy) { - LOG(FATAL) << getName() << " missing, not allowed."; - return false; - } - if (kMissParameterRand == FLAGS_load_missing_parameter_strategy) { - LOG(INFO) << getName() << " missing, set to random."; - randomize(); - return true; - } - if (kMissParameterZero == FLAGS_load_missing_parameter_strategy) { - LOG(INFO) << getName() << " missing, set to zero."; - zeroMem(); - return true; - } - LOG(FATAL) << "unsupported load_missing_parameter_strategy: " - << FLAGS_load_missing_parameter_strategy; - return false; -} - bool Parameter::load(const std::string& filename) { std::ifstream fs(filename, std::ios_base::binary); if (!fs) { - loadMiss(filename); + LOG(INFO) << "missing parameters [" << filename << "] while loading model."; + if (kMissParameterFail == FLAGS_load_missing_parameter_strategy) { + LOG(FATAL) << getName() << " missing, not allowed."; + return false; + } + if (kMissParameterRand == FLAGS_load_missing_parameter_strategy) { + LOG(INFO) << getName() << " missing, set to random."; + randomize(); + return true; + } + if (kMissParameterZero == FLAGS_load_missing_parameter_strategy) { + LOG(INFO) << getName() << " missing, set to zero."; + zeroMem(); + return true; + } + LOG(FATAL) << "unsupported load_missing_parameter_strategy: " + << FLAGS_load_missing_parameter_strategy; + return false; } return load(fs); } diff --git a/paddle/parameter/Parameter.h b/paddle/parameter/Parameter.h index 21932f6b6e..0bac76f068 100644 --- a/paddle/parameter/Parameter.h +++ b/paddle/parameter/Parameter.h @@ -201,11 +201,6 @@ public: */ bool save(std::ostream& s) const; - /** - * Fill parameter when file is missed - */ - bool loadMiss(const std::string& filename); - /** * Load parameter value from a file */ From ed4ea674671214d7decbb14d04430a1b0a331897 Mon Sep 17 00:00:00 2001 From: qijun Date: Thu, 10 Aug 2017 20:01:22 +0800 Subject: [PATCH 50/55] refine dockerfile --- Dockerfile | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) diff --git a/Dockerfile b/Dockerfile index f9beb1b25d..73310a2ec0 100644 --- a/Dockerfile +++ b/Dockerfile @@ -28,7 +28,13 @@ RUN apt-get update && \ wget unzip unrar tar xz-utils bzip2 gzip coreutils ntp \ curl sed grep graphviz libjpeg-dev zlib1g-dev \ python-matplotlib gcc-4.8 g++-4.8 \ - ln -sf gcc-4.8 /usr/bin/gcc && \ + automake locales clang-format swig doxygen cmake \ + liblapack-dev liblapacke-dev libboost-dev \ + clang-3.8 llvm-3.8 libclang-3.8-dev \ + net-tools && \ + apt-get clean -y + +RUN ln -sf gcc-4.8 /usr/bin/gcc && \ ln -sf gcc-ar-4.8 /usr/bin/gcc-ar && \ ln -sf gcc-nm-4.8 /usr/bin/gcc-nm && \ ln -sf gcc-ranlib-4.8 /usr/bin/gcc-ranlib && \ @@ -37,12 +43,7 @@ RUN apt-get update && \ ln -sf gcc-nm-4.8 /usr/bin/x86_64-linux-gnu-gcc-nm && \ ln -sf gcc-ranlib-4.8 /usr/bin/x86_64-linux-gnu-gcc-ranlib && \ ln -sf g++-4.8 /usr/bin/g++ && \ - ln -sf g++-4.8 /usr/bin/x86_64-linux-gnu-g++ && \ - automake locales clang-format swig doxygen cmake \ - liblapack-dev liblapacke-dev libboost-dev \ - clang-3.8 llvm-3.8 libclang-3.8-dev \ - net-tools && \ - apt-get clean -y + ln -sf g++-4.8 /usr/bin/x86_64-linux-gnu-g++ # paddle is using numpy.flip, which is introduced since 1.12.0 RUN pip --no-cache-dir install 'numpy>=1.12.0' From f48e2fafb47262112a1243d03babbb8b8a476de8 Mon Sep 17 00:00:00 2001 From: qijun Date: Thu, 10 Aug 2017 20:31:30 +0800 Subject: [PATCH 51/55] fix pip install error --- Dockerfile | 23 ++++++++++++----------- 1 file changed, 12 insertions(+), 11 deletions(-) diff --git a/Dockerfile b/Dockerfile index 73310a2ec0..c9bda6c2f7 100644 --- a/Dockerfile +++ b/Dockerfile @@ -34,17 +34,6 @@ RUN apt-get update && \ net-tools && \ apt-get clean -y -RUN ln -sf gcc-4.8 /usr/bin/gcc && \ - ln -sf gcc-ar-4.8 /usr/bin/gcc-ar && \ - ln -sf gcc-nm-4.8 /usr/bin/gcc-nm && \ - ln -sf gcc-ranlib-4.8 /usr/bin/gcc-ranlib && \ - ln -sf gcc-4.8 /usr/bin/x86_64-linux-gnu-gcc && \ - ln -sf gcc-ar-4.8 /usr/bin/x86_64-linux-gnu-gcc-ar && \ - ln -sf gcc-nm-4.8 /usr/bin/x86_64-linux-gnu-gcc-nm && \ - ln -sf gcc-ranlib-4.8 /usr/bin/x86_64-linux-gnu-gcc-ranlib && \ - ln -sf g++-4.8 /usr/bin/g++ && \ - ln -sf g++-4.8 /usr/bin/x86_64-linux-gnu-g++ - # paddle is using numpy.flip, which is introduced since 1.12.0 RUN pip --no-cache-dir install 'numpy>=1.12.0' @@ -82,6 +71,18 @@ RUN pip install --upgrade pip && \ RUN apt-get install -y libssl-dev libffi-dev RUN pip install certifi urllib3[secure] +# ln -sf to gcc4.8 +RUN ln -sf gcc-4.8 /usr/bin/gcc && \ + ln -sf gcc-ar-4.8 /usr/bin/gcc-ar && \ + ln -sf gcc-nm-4.8 /usr/bin/gcc-nm && \ + ln -sf gcc-ranlib-4.8 /usr/bin/gcc-ranlib && \ + ln -sf gcc-4.8 /usr/bin/x86_64-linux-gnu-gcc && \ + ln -sf gcc-ar-4.8 /usr/bin/x86_64-linux-gnu-gcc-ar && \ + ln -sf gcc-nm-4.8 /usr/bin/x86_64-linux-gnu-gcc-nm && \ + ln -sf gcc-ranlib-4.8 /usr/bin/x86_64-linux-gnu-gcc-ranlib && \ + ln -sf g++-4.8 /usr/bin/g++ && \ + ln -sf g++-4.8 /usr/bin/x86_64-linux-gnu-g++ + # Install woboq_codebrowser to /woboq RUN git clone https://github.com/woboq/woboq_codebrowser /woboq && \ (cd /woboq \ From 554e08ac7ad4c9dc09e68ad7723092b2447c011f Mon Sep 17 00:00:00 2001 From: qijun Date: Thu, 10 Aug 2017 20:56:29 +0800 Subject: [PATCH 52/55] fix build error: 'type name' declared as function returning a function --- paddle/operators/gaussian_random_op.cu | 3 ++- paddle/operators/uniform_random_op.cu | 3 ++- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/paddle/operators/gaussian_random_op.cu b/paddle/operators/gaussian_random_op.cu index 3f6824ebd0..1340b1e1e9 100644 --- a/paddle/operators/gaussian_random_op.cu +++ b/paddle/operators/gaussian_random_op.cu @@ -33,7 +33,8 @@ class GaussianRandomKernel : public framework::OpKernel { int seed = context.op_.GetAttr("seed"); if (seed == 0) { - seed = std::random_device()(); + std::random_device rd; + seed = rd(); } curandGenerator_t g; PADDLE_ENFORCE(platform::dynload::curandCreateGenerator( diff --git a/paddle/operators/uniform_random_op.cu b/paddle/operators/uniform_random_op.cu index f1a63e52ec..b35ebe7b63 100644 --- a/paddle/operators/uniform_random_op.cu +++ b/paddle/operators/uniform_random_op.cu @@ -51,7 +51,8 @@ class GPUUniformRandomKernel : public framework::OpKernel { unsigned int seed = static_cast(context.op_.GetAttr("seed")); if (seed == 0) { - seed = std::random_device()(); + std::random_device rd; + seed = rd(); } T min = static_cast(context.op_.GetAttr("min")); T max = static_cast(context.op_.GetAttr("max")); From 55ce5a82d4942a693f0a54a5b41a2e7b4dc02ab0 Mon Sep 17 00:00:00 2001 From: Helin Wang Date: Thu, 10 Aug 2017 17:04:15 -0700 Subject: [PATCH 53/55] Docker build: create deb file and build Go by default. Otherwise the production image generation will fail due to deb not found, or executable built from Go not found. --- paddle/scripts/docker/build.sh | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/paddle/scripts/docker/build.sh b/paddle/scripts/docker/build.sh index f70583c641..2f0205b770 100644 --- a/paddle/scripts/docker/build.sh +++ b/paddle/scripts/docker/build.sh @@ -31,7 +31,7 @@ Configuring cmake in /paddle/build ... -DWITH_DOC=OFF -DWITH_GPU=${WITH_GPU:-OFF} -DWITH_AVX=${WITH_AVX:-OFF} - -DWITH_GOLANG=${WITH_GOLANG:-OFF} + -DWITH_GOLANG=${WITH_GOLANG:-ON} -DWITH_SWIG_PY=ON -DWITH_C_API=${WITH_C_API:-OFF} -DWITH_PYTHON=${WITH_PYTHON:-ON} @@ -51,7 +51,7 @@ cmake .. \ -DWITH_DOC=OFF \ -DWITH_GPU=${WITH_GPU:-OFF} \ -DWITH_AVX=${WITH_AVX:-OFF} \ - -DWITH_GOLANG=${WITH_GOLANG:-OFF} \ + -DWITH_GOLANG=${WITH_GOLANG:-ON} \ -DWITH_SWIG_PY=${WITH_SWIG_PY:-ON} \ -DWITH_C_API=${WITH_C_API:-OFF} \ -DWITH_PYTHON=${WITH_PYTHON:-ON} \ @@ -130,7 +130,7 @@ fi # generate deb package for current build # FIXME(typhoonzero): should we remove paddle/scripts/deb ? -if [[ ${WITH_DEB:-OFF} == "ON" ]]; then +if [[ ${WITH_DEB:-ON} == "ON" ]]; then cat < Date: Fri, 11 Aug 2017 10:38:43 +0800 Subject: [PATCH 54/55] add TODO comment --- Dockerfile | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/Dockerfile b/Dockerfile index c9bda6c2f7..41b6729124 100644 --- a/Dockerfile +++ b/Dockerfile @@ -71,7 +71,10 @@ RUN pip install --upgrade pip && \ RUN apt-get install -y libssl-dev libffi-dev RUN pip install certifi urllib3[secure] -# ln -sf to gcc4.8 +# TODO(qijun) The template library Eigen doesn't work well with GCC 5 +# coming with the default Docker image, so we switch to use GCC 4.8 +# by default. And I will check Eigen library later. + RUN ln -sf gcc-4.8 /usr/bin/gcc && \ ln -sf gcc-ar-4.8 /usr/bin/gcc-ar && \ ln -sf gcc-nm-4.8 /usr/bin/gcc-nm && \ From 886e66a5ff8920d612023e3eb3091bbb1d5d21dd Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E6=AD=A6=E6=AF=85?= Date: Fri, 11 Aug 2017 14:35:33 +0800 Subject: [PATCH 55/55] golang pserver use OptimizerConfig.proto (#3358) * golang pserver optimizer config for user * update * update * update * update * update by comments * fix errors * fix errors --- go/pserver/client/c/test/test_train.py | 6 +- paddle/api/ParameterUpdater.cpp | 2 +- paddle/trainer/NewRemoteParameterUpdater.cpp | 98 ++++++++++++++++---- python/paddle/v2/optimizer.py | 24 ++++- python/paddle/v2/parameters.py | 14 +++ 5 files changed, 117 insertions(+), 27 deletions(-) diff --git a/go/pserver/client/c/test/test_train.py b/go/pserver/client/c/test/test_train.py index 572a61e4cc..8d9c6b9b20 100644 --- a/go/pserver/client/c/test/test_train.py +++ b/go/pserver/client/c/test/test_train.py @@ -17,12 +17,10 @@ def main(): # network config x = paddle.layer.data(name='x', type=paddle.data_type.dense_vector(13)) y_predict = paddle.layer.fc(input=x, - param_attr=paddle.attr.Param( - name='w', learning_rate=1e-3), + param_attr=paddle.attr.Param(name='w'), size=1, act=paddle.activation.Linear(), - bias_attr=paddle.attr.Param( - name='b', learning_rate=1e-3)) + bias_attr=paddle.attr.Param(name='b')) y = paddle.layer.data(name='y', type=paddle.data_type.dense_vector(1)) cost = paddle.layer.mse_cost(input=y_predict, label=y) diff --git a/paddle/api/ParameterUpdater.cpp b/paddle/api/ParameterUpdater.cpp index 5934cb898b..8cd73b348c 100644 --- a/paddle/api/ParameterUpdater.cpp +++ b/paddle/api/ParameterUpdater.cpp @@ -41,7 +41,7 @@ ParameterUpdater *ParameterUpdater::createNewRemoteUpdater( config->m->getConfig(), pserverSpec, useEtcd)); return updater; #else - throw UnsupportError(); + throw UnsupportError("not compiled with WITH_GOLANG"); #endif } diff --git a/paddle/trainer/NewRemoteParameterUpdater.cpp b/paddle/trainer/NewRemoteParameterUpdater.cpp index af1dceed02..cccb7e7cdd 100644 --- a/paddle/trainer/NewRemoteParameterUpdater.cpp +++ b/paddle/trainer/NewRemoteParameterUpdater.cpp @@ -66,28 +66,92 @@ void NewRemoteParameterUpdater::init( // from parameter server if (paddle_begin_init_params(parameterClient_)) { LOG(INFO) << "paddle_begin_init_params start"; + // NOTE: convert V1 OptimizatioinConfig proto to V2 OptimizerConfig. + // This makes golang pserver compatible with handy V1 demos. + // TODO: Refine or remove these ugly converting lines + OptimizerConfig optimizerConfigV2; + if (trainerConfig_.learning_method() == "momentum") { + optimizerConfigV2.set_optimizer(paddle::OptimizerConfig::SGD); + } else if (trainerConfig_.learning_method() == "adagrad") { + optimizerConfigV2.set_optimizer(paddle::OptimizerConfig::Adagrad); + optimizerConfigV2.mutable_adagrad()->set_epsilon( + trainerConfig_.ada_epsilon()); + } else if (trainerConfig_.learning_method() == "adadelta") { + optimizerConfigV2.set_optimizer(paddle::OptimizerConfig::Adagrad); + optimizerConfigV2.mutable_adadelta()->set_epsilon( + trainerConfig_.ada_epsilon()); + optimizerConfigV2.mutable_adadelta()->set_rho(trainerConfig_.ada_rou()); + } else if (trainerConfig_.learning_method() == "adam") { + optimizerConfigV2.set_optimizer(paddle::OptimizerConfig::Adam); + optimizerConfigV2.mutable_adam()->set_beta_1(trainerConfig_.adam_beta1()); + optimizerConfigV2.mutable_adam()->set_beta_2(trainerConfig_.adam_beta2()); + optimizerConfigV2.mutable_adam()->set_epsilon( + trainerConfig_.adam_epsilon()); + } else { + LOG(ERROR) << "got unsupported v1 optimizer config: " + << trainerConfig_.learning_method(); + optimizerConfigV2.set_optimizer(paddle::OptimizerConfig::SGD); + } + + if (trainerConfig_.learning_rate_schedule() == "constant") { + optimizerConfigV2.set_lr_policy(paddle::OptimizerConfig::Const); + optimizerConfigV2.mutable_const_lr()->set_learning_rate( + trainerConfig_.learning_rate()); + } else if (trainerConfig_.learning_rate_schedule() == "linear") { + optimizerConfigV2.set_lr_policy(paddle::OptimizerConfig::Linear); + optimizerConfigV2.mutable_linear_lr()->set_learning_rate( + trainerConfig_.learning_rate()); + optimizerConfigV2.mutable_linear_lr()->set_lr_decay_a( + trainerConfig_.learning_rate_decay_a()); + optimizerConfigV2.mutable_linear_lr()->set_lr_decay_b( + trainerConfig_.learning_rate_decay_b()); + } else { + LOG(ERROR) << "got unsupported v1 learning_rate_schedule config: " + << trainerConfig_.learning_rate_schedule() << ", set to const"; + optimizerConfigV2.set_lr_policy(paddle::OptimizerConfig::Const); + } + + // overwrite optimizerConfigV2 for per-parameter(layer) configs for (int i = 0; i < parameterSize(); ++i) { auto paramConfig = parameters_[i]->getConfig(); - LOG(INFO) << "old param config: " << paramConfig.DebugString(); - // FIXME(typhoonzero): convert old paramConfig to optimizerConfig - OptimizerConfig optimizeConfigV2; - auto sgdConfigV2 = optimizeConfigV2.mutable_sgd(); - sgdConfigV2->set_momentum(paramConfig.momentum()); - sgdConfigV2->set_decay(paramConfig.decay_rate()); - optimizeConfigV2.set_lr_policy(paddle::OptimizerConfig::Const); - auto constlr = optimizeConfigV2.mutable_const_lr(); + if (paramConfig.has_momentum() && + trainerConfig_.learning_method() == "momentum") { + optimizerConfigV2.mutable_sgd()->set_momentum(paramConfig.momentum()); + } if (paramConfig.has_learning_rate()) { - constlr->set_learning_rate(paramConfig.learning_rate()); - } else { - constlr->set_learning_rate(trainerConfig_.learning_rate()); + switch (optimizerConfigV2.lr_policy()) { + case 0: + optimizerConfigV2.mutable_const_lr()->set_learning_rate( + paramConfig.learning_rate()); + break; + case 1: + optimizerConfigV2.mutable_linear_lr()->set_learning_rate( + paramConfig.learning_rate()); + break; + } } - if (trainerConfig_.algorithm() == "sgd") { - optimizeConfigV2.set_optimizer(paddle::OptimizerConfig::SGD); - // FIXME: config all algorithms - } else { - optimizeConfigV2.set_optimizer(paddle::OptimizerConfig::SGD); + if (paramConfig.has_decay_rate()) { + switch (optimizerConfigV2.optimizer()) { + case 1: // SGD + optimizerConfigV2.mutable_sgd()->set_decay( + paramConfig.decay_rate()); + break; + case 2: // Adadelta + optimizerConfigV2.mutable_adadelta()->set_decay( + paramConfig.decay_rate()); + break; + case 3: // Adagrad + optimizerConfigV2.mutable_adagrad()->set_decay( + paramConfig.decay_rate()); + break; + case 4: // Adam + optimizerConfigV2.mutable_adam()->set_decay( + paramConfig.decay_rate()); + break; + } } - std::string bytes = optimizeConfigV2.SerializeAsString(); + // send param and config to pserver + std::string bytes = optimizerConfigV2.SerializeAsString(); const char *array = bytes.data(); int size = (int)bytes.size(); paddle_init_param( diff --git a/python/paddle/v2/optimizer.py b/python/paddle/v2/optimizer.py index ba58198033..29f0945eb4 100644 --- a/python/paddle/v2/optimizer.py +++ b/python/paddle/v2/optimizer.py @@ -1,13 +1,26 @@ -import paddle.trainer_config_helpers.config_parser_utils as config_parser_utils -import paddle.trainer_config_helpers.optimizers as v1_optimizers +# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. """ Optimizers(update equation) for SGD method. -TODO(zhihong) : create new optimizer with proto config, add new optimizer here - TODO(yuyang18): Complete comments. """ +import paddle.trainer_config_helpers.config_parser_utils as config_parser_utils +import paddle.trainer_config_helpers.optimizers as v1_optimizers +from paddle.proto.OptimizerConfig_pb2 import OptimizerConfig + __all__ = [ 'Momentum', 'Adam', 'Adamax', 'AdaGrad', 'DecayedAdaGrad', 'AdaDelta', 'RMSProp', 'ModelAverage', 'L2Regularization' @@ -70,7 +83,8 @@ class Optimizer(object): gradient_machine.prefetch(in_args) parameter_updater.getParametersRemote() - :param pserver_spec: pserver location, eg: localhost:3000 + :param pserver_spec: pserver location, eg: localhost:3000, if use etcd, + pserver_spec should be the etcd endpoints, eg: http://localhost:2379 :return: parameter_updater """ if is_local: diff --git a/python/paddle/v2/parameters.py b/python/paddle/v2/parameters.py index a9cba8ca0b..364306d674 100644 --- a/python/paddle/v2/parameters.py +++ b/python/paddle/v2/parameters.py @@ -1,3 +1,17 @@ +# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + import numpy as np from paddle.proto.ParameterConfig_pb2 import ParameterConfig import paddle.trainer.config_parser as cp