|  |  |  | @ -14,7 +14,9 @@ limitations under the License. */ | 
			
		
	
		
			
				
					|  |  |  |  | 
 | 
			
		
	
		
			
				
					|  |  |  |  | #include "paddle/fluid/operators/lrn_op.h" | 
			
		
	
		
			
				
					|  |  |  |  | #include <string> | 
			
		
	
		
			
				
					|  |  |  |  | #include <vector> | 
			
		
	
		
			
				
					|  |  |  |  | #include "paddle/fluid/operators/math/blas.h" | 
			
		
	
		
			
				
					|  |  |  |  | #include "paddle/fluid/operators/math/math_function.h" | 
			
		
	
		
			
				
					|  |  |  |  | #ifdef PADDLE_WITH_MKLDNN | 
			
		
	
		
			
				
					|  |  |  |  | #include "paddle/fluid/platform/mkldnn_helper.h" | 
			
		
	
		
			
				
					|  |  |  |  | #endif | 
			
		
	
	
		
			
				
					|  |  |  | @ -23,18 +25,41 @@ namespace paddle { | 
			
		
	
		
			
				
					|  |  |  |  | namespace operators { | 
			
		
	
		
			
				
					|  |  |  |  | 
 | 
			
		
	
		
			
				
					|  |  |  |  | using framework::Tensor; | 
			
		
	
		
			
				
					|  |  |  |  | using DataLayout = framework::DataLayout; | 
			
		
	
		
			
				
					|  |  |  |  | 
 | 
			
		
	
		
			
				
					|  |  |  |  | template <typename T> | 
			
		
	
		
			
				
					|  |  |  |  | struct LRNFunctor<platform::CPUDeviceContext, T> { | 
			
		
	
		
			
				
					|  |  |  |  |   void operator()(const framework::ExecutionContext& ctx, | 
			
		
	
		
			
				
					|  |  |  |  |                   const framework::Tensor& input, framework::Tensor* out, | 
			
		
	
		
			
				
					|  |  |  |  |                   framework::Tensor* mid, int N, int C, int H, int W, int n, | 
			
		
	
		
			
				
					|  |  |  |  |                   T k, T alpha, T beta) { | 
			
		
	
		
			
				
					|  |  |  |  |     const T* idata = input.data<T>(); | 
			
		
	
		
			
				
					|  |  |  |  |                   T k, T alpha, T beta, const DataLayout data_layout) { | 
			
		
	
		
			
				
					|  |  |  |  |     auto place = ctx.GetPlace(); | 
			
		
	
		
			
				
					|  |  |  |  |     auto blas = math::GetBlas<platform::CPUDeviceContext, T>(ctx); | 
			
		
	
		
			
				
					|  |  |  |  |     T* odata = out->mutable_data<T>(place); | 
			
		
	
		
			
				
					|  |  |  |  |     T* mdata = mid->mutable_data<T>(place); | 
			
		
	
		
			
				
					|  |  |  |  |     math::Transpose<platform::CPUDeviceContext, T, 4> transpose; | 
			
		
	
		
			
				
					|  |  |  |  |     auto& dev_ctx = ctx.template device_context<platform::CPUDeviceContext>(); | 
			
		
	
		
			
				
					|  |  |  |  |     Tensor in_transpose, mid_transpose, out_transpose; | 
			
		
	
		
			
				
					|  |  |  |  |     // if channel_last, transpose to channel_first
 | 
			
		
	
		
			
				
					|  |  |  |  |     if (data_layout == DataLayout::kNHWC) { | 
			
		
	
		
			
				
					|  |  |  |  |       auto in_dims = input.dims(); | 
			
		
	
		
			
				
					|  |  |  |  |       std::vector<int64_t> shape( | 
			
		
	
		
			
				
					|  |  |  |  |           {in_dims[0], in_dims[3], in_dims[1], in_dims[2]}); | 
			
		
	
		
			
				
					|  |  |  |  |       in_transpose.mutable_data<T>(framework::make_ddim(shape), place); | 
			
		
	
		
			
				
					|  |  |  |  |       mid_transpose.mutable_data<T>(framework::make_ddim(shape), place); | 
			
		
	
		
			
				
					|  |  |  |  |       out_transpose.mutable_data<T>(framework::make_ddim(shape), place); | 
			
		
	
		
			
				
					|  |  |  |  |       std::vector<int> axis = {0, 3, 1, 2}; | 
			
		
	
		
			
				
					|  |  |  |  |       transpose(dev_ctx, input, &in_transpose, axis); | 
			
		
	
		
			
				
					|  |  |  |  |     } else { | 
			
		
	
		
			
				
					|  |  |  |  |       in_transpose = input; | 
			
		
	
		
			
				
					|  |  |  |  |       mid_transpose = *mid; | 
			
		
	
		
			
				
					|  |  |  |  |       out_transpose = *out; | 
			
		
	
		
			
				
					|  |  |  |  |       mid_transpose.mutable_data<T>(mid->dims(), place); | 
			
		
	
		
			
				
					|  |  |  |  |       out_transpose.mutable_data<T>(out->dims(), place); | 
			
		
	
		
			
				
					|  |  |  |  |     } | 
			
		
	
		
			
				
					|  |  |  |  | 
 | 
			
		
	
		
			
				
					|  |  |  |  |     const T* idata = in_transpose.data<T>(); | 
			
		
	
		
			
				
					|  |  |  |  |     T* odata = out_transpose.data<T>(); | 
			
		
	
		
			
				
					|  |  |  |  |     T* mdata = mid_transpose.data<T>(); | 
			
		
	
		
			
				
					|  |  |  |  | 
 | 
			
		
	
		
			
				
					|  |  |  |  |     Tensor squared; | 
			
		
	
		
			
				
					|  |  |  |  |     T* sdata = squared.mutable_data<T>({1, C + n - 1, H, W}, place); | 
			
		
	
		
			
				
					|  |  |  |  |     std::memset(sdata, 0, sizeof(T) * squared.numel()); | 
			
		
	
	
		
			
				
					|  |  |  | @ -67,6 +92,13 @@ struct LRNFunctor<platform::CPUDeviceContext, T> { | 
			
		
	
		
			
				
					|  |  |  |  |     // compute the final output
 | 
			
		
	
		
			
				
					|  |  |  |  |     blas.VPOW(mid->numel(), mdata, -beta, odata); | 
			
		
	
		
			
				
					|  |  |  |  |     blas.VMUL(mid->numel(), odata, idata, odata); | 
			
		
	
		
			
				
					|  |  |  |  | 
 | 
			
		
	
		
			
				
					|  |  |  |  |     // if channel_last, transpose the output(NCHW) to channel_last
 | 
			
		
	
		
			
				
					|  |  |  |  |     if (data_layout == DataLayout::kNHWC) { | 
			
		
	
		
			
				
					|  |  |  |  |       std::vector<int> axis = {0, 2, 3, 1}; | 
			
		
	
		
			
				
					|  |  |  |  |       transpose(dev_ctx, mid_transpose, mid, axis); | 
			
		
	
		
			
				
					|  |  |  |  |       transpose(dev_ctx, out_transpose, out, axis); | 
			
		
	
		
			
				
					|  |  |  |  |     } | 
			
		
	
		
			
				
					|  |  |  |  |   } | 
			
		
	
		
			
				
					|  |  |  |  | }; | 
			
		
	
		
			
				
					|  |  |  |  | template struct LRNFunctor<platform::CPUDeviceContext, float>; | 
			
		
	
	
		
			
				
					|  |  |  | @ -78,7 +110,7 @@ struct LRNGradFunctor<platform::CPUDeviceContext, T> { | 
			
		
	
		
			
				
					|  |  |  |  |                   const framework::Tensor& x, const framework::Tensor& out, | 
			
		
	
		
			
				
					|  |  |  |  |                   const framework::Tensor& mid, framework::Tensor* x_g, | 
			
		
	
		
			
				
					|  |  |  |  |                   const framework::Tensor& out_g, int N, int C, int H, int W, | 
			
		
	
		
			
				
					|  |  |  |  |                   int n, T alpha, T beta) { | 
			
		
	
		
			
				
					|  |  |  |  |                   int n, T alpha, T beta, const DataLayout data_layout) { | 
			
		
	
		
			
				
					|  |  |  |  |     T ratio = -2 * alpha * beta; | 
			
		
	
		
			
				
					|  |  |  |  |     auto x_g_e = framework::EigenVector<T>::Flatten(*x_g); | 
			
		
	
		
			
				
					|  |  |  |  |     x_g_e = x_g_e.constant(0.0); | 
			
		
	
	
		
			
				
					|  |  |  | @ -93,17 +125,17 @@ struct LRNGradFunctor<platform::CPUDeviceContext, T> { | 
			
		
	
		
			
				
					|  |  |  |  |     const int end = start + n; | 
			
		
	
		
			
				
					|  |  |  |  |     for (int m = 0; m < N; m++) { | 
			
		
	
		
			
				
					|  |  |  |  |       for (int i = 0; i < C; i++) { | 
			
		
	
		
			
				
					|  |  |  |  |         auto i_x = e_x.slice(Eigen::array<int, 4>({{m, i, 0, 0}}), | 
			
		
	
		
			
				
					|  |  |  |  |                              Eigen::array<int, 4>({{1, 1, H, W}})); | 
			
		
	
		
			
				
					|  |  |  |  | 
 | 
			
		
	
		
			
				
					|  |  |  |  |         auto i_x_g = e_x_g.slice(Eigen::array<int, 4>({{m, i, 0, 0}}), | 
			
		
	
		
			
				
					|  |  |  |  |                                  Eigen::array<int, 4>({{1, 1, H, W}})); | 
			
		
	
		
			
				
					|  |  |  |  | 
 | 
			
		
	
		
			
				
					|  |  |  |  |         auto i_out_g = e_out_g.slice(Eigen::array<int, 4>({{m, i, 0, 0}}), | 
			
		
	
		
			
				
					|  |  |  |  |                                      Eigen::array<int, 4>({{1, 1, H, W}})); | 
			
		
	
		
			
				
					|  |  |  |  |         auto offsets = Eigen::array<int, 4>({{m, i, 0, 0}}); | 
			
		
	
		
			
				
					|  |  |  |  |         auto extents = Eigen::array<int, 4>({{1, 1, H, W}}); | 
			
		
	
		
			
				
					|  |  |  |  |         if (data_layout == DataLayout::kNHWC) { | 
			
		
	
		
			
				
					|  |  |  |  |           offsets = Eigen::array<int, 4>({{m, 0, 0, i}}); | 
			
		
	
		
			
				
					|  |  |  |  |           extents = Eigen::array<int, 4>({{1, H, W, 1}}); | 
			
		
	
		
			
				
					|  |  |  |  |         } | 
			
		
	
		
			
				
					|  |  |  |  | 
 | 
			
		
	
		
			
				
					|  |  |  |  |         auto i_mid = e_mid.slice(Eigen::array<int, 4>({{m, i, 0, 0}}), | 
			
		
	
		
			
				
					|  |  |  |  |                                  Eigen::array<int, 4>({{1, 1, H, W}})); | 
			
		
	
		
			
				
					|  |  |  |  |         auto i_x = e_x.slice(offsets, extents); | 
			
		
	
		
			
				
					|  |  |  |  |         auto i_x_g = e_x_g.slice(offsets, extents); | 
			
		
	
		
			
				
					|  |  |  |  |         auto i_out_g = e_out_g.slice(offsets, extents); | 
			
		
	
		
			
				
					|  |  |  |  |         auto i_mid = e_mid.slice(offsets, extents); | 
			
		
	
		
			
				
					|  |  |  |  | 
 | 
			
		
	
		
			
				
					|  |  |  |  |         i_x_g = i_mid.pow(-beta) * i_out_g; | 
			
		
	
		
			
				
					|  |  |  |  |         for (int c = start; c < end; c++) { | 
			
		
	
	
		
			
				
					|  |  |  | @ -112,14 +144,14 @@ struct LRNGradFunctor<platform::CPUDeviceContext, T> { | 
			
		
	
		
			
				
					|  |  |  |  |             continue; | 
			
		
	
		
			
				
					|  |  |  |  |           } | 
			
		
	
		
			
				
					|  |  |  |  | 
 | 
			
		
	
		
			
				
					|  |  |  |  |           auto c_out = e_out.slice(Eigen::array<int, 4>({{m, ch, 0, 0}}), | 
			
		
	
		
			
				
					|  |  |  |  |                                    Eigen::array<int, 4>({{1, 1, H, W}})); | 
			
		
	
		
			
				
					|  |  |  |  | 
 | 
			
		
	
		
			
				
					|  |  |  |  |           auto c_mid = e_mid.slice(Eigen::array<int, 4>({{m, ch, 0, 0}}), | 
			
		
	
		
			
				
					|  |  |  |  |                                    Eigen::array<int, 4>({{1, 1, H, W}})); | 
			
		
	
		
			
				
					|  |  |  |  | 
 | 
			
		
	
		
			
				
					|  |  |  |  |           auto c_out_g = e_out_g.slice(Eigen::array<int, 4>({{m, ch, 0, 0}}), | 
			
		
	
		
			
				
					|  |  |  |  |                                        Eigen::array<int, 4>({{1, 1, H, W}})); | 
			
		
	
		
			
				
					|  |  |  |  |           if (data_layout != DataLayout::kNHWC) { | 
			
		
	
		
			
				
					|  |  |  |  |             offsets = Eigen::array<int, 4>({{m, ch, 0, 0}}); | 
			
		
	
		
			
				
					|  |  |  |  |           } else { | 
			
		
	
		
			
				
					|  |  |  |  |             offsets = Eigen::array<int, 4>({{m, 0, 0, ch}}); | 
			
		
	
		
			
				
					|  |  |  |  |           } | 
			
		
	
		
			
				
					|  |  |  |  |           auto c_out = e_out.slice(offsets, extents); | 
			
		
	
		
			
				
					|  |  |  |  |           auto c_mid = e_mid.slice(offsets, extents); | 
			
		
	
		
			
				
					|  |  |  |  |           auto c_out_g = e_out_g.slice(offsets, extents); | 
			
		
	
		
			
				
					|  |  |  |  | 
 | 
			
		
	
		
			
				
					|  |  |  |  |           i_x_g += ratio * c_out_g * c_out * i_x / c_mid; | 
			
		
	
		
			
				
					|  |  |  |  |         } | 
			
		
	
	
		
			
				
					|  |  |  | @ -156,9 +188,8 @@ class LRNOp : public framework::OperatorWithKernel { | 
			
		
	
		
			
				
					|  |  |  |  |   framework::OpKernelType GetExpectedKernelType( | 
			
		
	
		
			
				
					|  |  |  |  |       const framework::ExecutionContext& ctx) const override { | 
			
		
	
		
			
				
					|  |  |  |  |     framework::LibraryType library_{framework::LibraryType::kPlain}; | 
			
		
	
		
			
				
					|  |  |  |  |     std::string data_format = ctx.Attr<std::string>("data_format"); | 
			
		
	
		
			
				
					|  |  |  |  |     // TODO(pzelazko-intel): enable MKLDNN layout when it's ready
 | 
			
		
	
		
			
				
					|  |  |  |  |     framework::DataLayout layout_ = framework::StringToDataLayout(data_format); | 
			
		
	
		
			
				
					|  |  |  |  |     framework::DataLayout layout_ = framework::DataLayout::kAnyLayout; | 
			
		
	
		
			
				
					|  |  |  |  | #ifdef PADDLE_WITH_MKLDNN | 
			
		
	
		
			
				
					|  |  |  |  |     if (library_ == framework::LibraryType::kPlain && | 
			
		
	
		
			
				
					|  |  |  |  |         platform::CanMKLDNNBeUsed(ctx)) { | 
			
		
	
	
		
			
				
					|  |  |  | @ -242,8 +273,8 @@ $$ | 
			
		
	
		
			
				
					|  |  |  |  | 
 | 
			
		
	
		
			
				
					|  |  |  |  | Function implementation: | 
			
		
	
		
			
				
					|  |  |  |  | 
 | 
			
		
	
		
			
				
					|  |  |  |  | Inputs and outpus are in NCHW format, while input.shape.ndims() equals 4. | 
			
		
	
		
			
				
					|  |  |  |  | And dimensions 0 ~ 3 represent batch size, feature maps, rows, | 
			
		
	
		
			
				
					|  |  |  |  | Inputs and outpus are in NCHW or NHWC format, while input.shape.ndims() equals 4. | 
			
		
	
		
			
				
					|  |  |  |  | If NCHW, the dimensions 0 ~ 3 represent batch size, feature maps, rows, | 
			
		
	
		
			
				
					|  |  |  |  | and columns, respectively. | 
			
		
	
		
			
				
					|  |  |  |  | 
 | 
			
		
	
		
			
				
					|  |  |  |  | Input and Output in the formula above is for each map(i) of one image, and | 
			
		
	
	
		
			
				
					|  |  |  | @ -275,9 +306,8 @@ class LRNOpGrad : public framework::OperatorWithKernel { | 
			
		
	
		
			
				
					|  |  |  |  |   framework::OpKernelType GetExpectedKernelType( | 
			
		
	
		
			
				
					|  |  |  |  |       const framework::ExecutionContext& ctx) const override { | 
			
		
	
		
			
				
					|  |  |  |  |     framework::LibraryType library_{framework::LibraryType::kPlain}; | 
			
		
	
		
			
				
					|  |  |  |  |     std::string data_format = ctx.Attr<std::string>("data_format"); | 
			
		
	
		
			
				
					|  |  |  |  |     // TODO(pzelazko-intel): enable MKLDNN layout when it's ready
 | 
			
		
	
		
			
				
					|  |  |  |  |     framework::DataLayout layout_ = framework::StringToDataLayout(data_format); | 
			
		
	
		
			
				
					|  |  |  |  |     framework::DataLayout layout_ = framework::DataLayout::kAnyLayout; | 
			
		
	
		
			
				
					|  |  |  |  | #ifdef PADDLE_WITH_MKLDNN | 
			
		
	
		
			
				
					|  |  |  |  |     if (library_ == framework::LibraryType::kPlain && | 
			
		
	
		
			
				
					|  |  |  |  |         platform::CanMKLDNNBeUsed(ctx)) { | 
			
		
	
	
		
			
				
					|  |  |  | 
 |