Merge branch 'develop' of https://github.com/PaddlePaddle/Paddle into dev_refine_bilinear_interp

revert-11610-move_hooks
fengjiayi 7 years ago
commit 8567d0424d

@ -97,7 +97,7 @@ def dist_transpile(trainer_id, args):
return train_program, fluid.default_startup_program()
else:
raise ValueError(
'TRAINING_ROLE environment variable must be either TRAINER or PSERVER'
'PADDLE_TRAINING_ROLE environment variable must be either TRAINER or PSERVER'
)

@ -108,10 +108,10 @@ def gen_job():
tn_container["ports"][0]["containerPort"] = spreadport
envs.append({"name": "PADDLE_JOB_NAME", "value": args.jobname})
envs.append({"name": "TRAINERS", "value": str(args.trainers)})
envs.append({"name": "PADDLE_TRAINERS", "value": str(args.trainers)})
envs.append({"name": "PSERVERS", "value": str(args.pservers)})
envs.append({"name": "ENTRY", "value": args.entry})
envs.append({"name": "PADDLE_INIT_PORT", "value": str(args.port)})
envs.append({"name": "PADDLE_PSERVER_PORT", "value": str(args.port)})
envs.append({"name": "PADDLE_PSERVER_PORT", "value": str(args.port)})
# NOTE: these directories below are cluster specific, please modify
# this settings before you run on your own cluster.
@ -167,16 +167,22 @@ def gen_job():
tn_container["volumeMounts"] = volumeMounts
ps_container["env"] = envs
ps_container["env"].append({"name": "TRAINING_ROLE", "value": "PSERVER"})
ps_container["env"].append({
"name": "PADDLE_TRAINING_ROLE",
"value": "PSERVER"
})
tn_container["env"] = envs
if args.disttype == "pserver":
tn_container["env"].append({
"name": "TRAINING_ROLE",
"name": "PADDLE_TRAINING_ROLE",
"value": "TRAINER"
})
elif args.disttype == "nccl2" or args.disttype == "local":
# NCCL2 have no training role, set to plain WORKER
tn_container["env"].append({"name": "TRAINING_ROLE", "value": "WORKER"})
tn_container["env"].append({
"name": "PADDLE_TRAINING_ROLE",
"value": "WORKER"
})
os.mkdir(args.jobname)
if args.disttype == "pserver":

@ -45,7 +45,8 @@ IF(${CBLAS_PROVIDER} STREQUAL "MKLML")
ELSE()
MESSAGE(FATAL_ERROR "Should enable MKLML when build MKLDNN")
ENDIF()
SET(MKLDNN_FLAG "-Wno-error=strict-overflow -Wno-error=unused-result -Wno-unused-result")
SET(MKLDNN_FLAG "-Wno-error=strict-overflow -Wno-error=unused-result")
SET(MKLDNN_FLAG "${MKLDNN_FLAG} -Wno-unused-result -Wno-unused-value")
SET(MKLDNN_CFLAG "${CMAKE_C_FLAGS} ${MKLDNN_FLAG}")
SET(MKLDNN_CXXFLAG "${CMAKE_CXX_FLAGS} ${MKLDNN_FLAG}")
ExternalProject_Add(

@ -168,13 +168,13 @@ cd /paddle/python/paddle/fluid/tests/book
第二步启动Parameter Server
```bash
PADDLE_INIT_PORT=6174 PADDLE_INIT_PSERVERS=192.168.1.2 TRAINERS=2 POD_IP=192.168.1.2 PADDLE_INIT_TRAINER_ID=1 TRAINING_ROLE=PSERVER python test_fit_a_line.py
PADDLE_PSERVER_PORT=6174 PADDLE_PSERVER_IPS=192.168.1.2 PADDLE_TRAINERS=2 PADDLE_CURRENT_IP=192.168.1.2 PADDLE_TRAINER_ID=1 PADDLE_TRAINING_ROLE=PSERVER python test_fit_a_line.py
```
执行命令后请等待出现提示: ```Server listening on 192.168.1.2:6174 ```, 表示Paramter Server已经正常启动。
第三步启动Trainer
```bash
PADDLE_INIT_PORT=6174 PADDLE_INIT_PSERVERS=192.168.1.3 TRAINERS=2 POD_IP=192.168.1.3 PADDLE_INIT_TRAINER_ID=1 TRAINING_ROLE=TRAINER python test_fit_a_line.py
PADDLE_PSERVER_PORT=6174 PADDLE_PSERVER_IPS=192.168.1.3 PADDLE_TRAINERS=2 PADDLE_CURRENT_IPP=192.168.1.3 PADDLE_TRAINER_ID=1 PADDLE_TRAINING_ROLE=TRAINER python test_fit_a_line.py
```
由于我们定义的Trainer的数量是2个因此需要在另外一个计算节点上再启动一个Trainer。

@ -114,8 +114,8 @@ def gen_train_list(file_pattern, trainers, trainer_id):
ret_list.append(f)
return ret_list
trainers = int(os.getenv("TRAINERS"))
trainer_id = int(os.getenv("PADDLE_INIT_TRAINER_ID"))
trainers = int(os.getenv("PADDLE_TRAINERS"))
trainer_id = int(os.getenv("PADDLE_TRAINER_ID"))
data_file = fluid.layers.io.open_files(
filenames=gen_train_list("./mnist-[0-9]*.recordio", 2, 0),
thread_num=1,

@ -143,7 +143,7 @@ $$out = \\frac{e^{x} - e^{-x}}{e^{x} + e^{-x}}$$
__attribute__((unused)) constexpr char TanhShrinkDoc[] = R"DOC(
TanhShrink Activation Operator.
$$out = x - \frac{e^{x} - e^{-x}}{e^{x} + e^{-x}}$$
$$out = x - \\frac{e^{x} - e^{-x}}{e^{x} + e^{-x}}$$
)DOC";
@ -385,7 +385,7 @@ class STanhOpMaker : public framework::OpProtoAndCheckerMaker {
AddComment(R"DOC(
STanh Activation Operator.
$$out = b * \frac{e^{a * x} - e^{-a * x}}{e^{a * x} + e^{-a * x}}$$
$$out = b * \\frac{e^{a * x} - e^{-a * x}}{e^{a * x} + e^{-a * x}}$$
)DOC");
}

@ -175,12 +175,12 @@ class DetectionMAPOpMaker : public framework::OpProtoAndCheckerMaker {
AddComment(R"DOC(
Detection mAP evaluate operator.
The general steps are as follows. First, calculate the true positive and
false positive according to the input of detection and labels, then
calculate the mAP evaluate value.
Supporting '11 point' and 'integral' mAP algorithm. Please get more information
from the following articles:
https://sanchom.wordpress.com/tag/average-precision/
https://arxiv.org/abs/1512.02325
false positive according to the input of detection and labels, then
calculate the mAP evaluate value.
Supporting '11 point' and 'integral' mAP algorithm. Please get more information
from the following articles:
https://sanchom.wordpress.com/tag/average-precision/
https://arxiv.org/abs/1512.02325
)DOC");
}

@ -0,0 +1,55 @@
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include <string>
#include "paddle/fluid/operators/mean_op.h"
namespace paddle {
namespace operators {
using framework::DataLayout;
template <typename T>
class GaussianMKLDNNKernel : public paddle::framework::OpKernel<T> {
public:
void Compute(const framework::ExecutionContext& context) const override {
float mean = context.Attr<float>("mean");
float std = context.Attr<float>("std");
auto* tensor = context.Output<framework::Tensor>("Out");
T* data = tensor->mutable_data<T>(context.GetPlace());
unsigned int seed = static_cast<unsigned int>(context.Attr<int>("seed"));
std::minstd_rand engine;
if (seed == 0) {
seed = std::random_device()();
}
engine.seed(seed);
std::normal_distribution<T> dist(mean, std);
int64_t size = tensor->numel();
for (int64_t i = 0; i < size; ++i) {
data[i] = dist(engine);
}
// The format of output is set as the mkldnn's format
// TODO(@mozga-intel) The format of matrix sets inside the another layers.
tensor->set_layout(DataLayout::kMKLDNN);
tensor->set_format(mkldnn::memory::format::oihw);
}
};
} // namespace operators
} // namespace paddle
namespace ops = paddle::operators;
REGISTER_OP_KERNEL(gaussian_random, MKLDNN, ::paddle::platform::CPUPlace,
ops::GaussianMKLDNNKernel<float>);

@ -15,6 +15,10 @@ limitations under the License. */
#include <random>
#include "paddle/fluid/framework/op_registry.h"
#ifdef PADDLE_WITH_MKLDNN
#include "paddle/fluid/platform/mkldnn_helper.h"
#endif
namespace paddle {
namespace operators {
@ -62,9 +66,20 @@ class GaussianRandomOp : public framework::OperatorWithKernel {
protected:
framework::OpKernelType GetExpectedKernelType(
const framework::ExecutionContext& ctx) const override {
framework::LibraryType library{framework::LibraryType::kPlain};
framework::DataLayout layout{framework::DataLayout::kAnyLayout};
#ifdef PADDLE_WITH_MKLDNN
if (library == framework::LibraryType::kPlain &&
platform::CanMKLDNNBeUsed(ctx)) {
library = framework::LibraryType::kMKLDNN;
layout = framework::DataLayout::kMKLDNN;
}
#endif
return framework::OpKernelType(
static_cast<framework::proto::VarType::Type>(ctx.Attr<int>("dtype")),
ctx.device_context());
ctx.device_context(), layout, library);
}
};
@ -95,7 +110,9 @@ class GaussianRandomOpMaker : public framework::OpProtoAndCheckerMaker {
"(int, default 5(FP32)) "
"Output data type.")
.SetDefault(framework::proto::VarType::FP32);
AddAttr<bool>("use_mkldnn",
"(bool, default false) Only used in mkldnn kernel")
.SetDefault(false);
AddComment(R"DOC(
GaussianRandom Operator.

@ -22,43 +22,24 @@ namespace paddle {
namespace operators {
namespace math {
template <typename T>
__device__ T upper_bound(const T* first, T count, T val) {
const T* orig = first;
const T* it = nullptr;
T step = 0;
while (count > 0) {
it = first;
step = count / 2;
it += step;
if (!(val < *it)) {
first = ++it;
count -= step + 1;
} else {
count = step;
}
}
return first - orig;
}
template <typename T>
__global__ void KernelConcat(T** inputs, const int* input_cols, int col_size,
const int output_rows, const int output_cols,
T* output) {
int tid_x = blockIdx.x * blockDim.x + threadIdx.x;
int segment = upper_bound<int>(input_cols, col_size, tid_x) - 1;
int curr_offset = input_cols[segment];
int curr_segment = segment;
int curr_segment = 0;
int curr_offset = input_cols[0];
for (; tid_x < output_cols; tid_x += blockDim.x * gridDim.x) {
T curr_col_offset;
while ((curr_col_offset = input_cols[curr_segment + 1]) <= tid_x) {
int curr_col_offset = input_cols[curr_segment + 1];
while (curr_col_offset <= tid_x) {
curr_offset = curr_col_offset;
++curr_segment;
curr_col_offset = input_cols[curr_segment + 1];
}
int local_col = tid_x - curr_offset;
int segment_width = curr_col_offset - curr_offset;
T* input_ptr = inputs[curr_segment];
int tid_y = blockIdx.y * blockDim.y + threadIdx.y;
for (; tid_y < output_rows; tid_y += blockDim.y * gridDim.y)
@ -89,14 +70,14 @@ __global__ void KernelConcatGrad(const T* input_data, const int in_row,
const int in_col, const int* out_cols,
int out_cols_size, T** outputs_data) {
int tid_x = blockIdx.x * blockDim.x + threadIdx.x;
int segment = upper_bound<int>(out_cols, out_cols_size, tid_x) - 1;
int curr_offset = out_cols[segment];
int curr_segment = segment;
int curr_segment = 0;
int curr_offset = out_cols[0];
for (; tid_x < in_col; tid_x += blockDim.x * gridDim.x) {
T curr_col_offset;
while ((curr_col_offset = out_cols[curr_segment + 1]) <= tid_x) {
int curr_col_offset = out_cols[curr_segment + 1];
while (curr_col_offset <= tid_x) {
curr_offset = curr_col_offset;
++curr_segment;
curr_col_offset = out_cols[curr_segment + 1];
}
int local_col = tid_x - curr_offset;
@ -228,7 +209,7 @@ class ConcatGradFunctor<platform::CUDADeviceContext, T> {
outputs_cols[0] = 0;
for (int i = 0; i < o_num; ++i) {
int t_col = outputs->at(i)->numel() / out_row;
int t_col = ref_inputs.at(i)->numel() / out_row;
if (sameShape) {
if (t_col != out0_col) sameShape = false;
}

@ -295,7 +295,7 @@ class ParallelDoGradOp : public framework::OperatorBase {
auto sum_op = framework::OpRegistry::CreateOp(
"sum", {{"X", {s, tmp_name}}}, {{"Out", {s}}},
framework::AttributeMap{});
framework::AttributeMap{{"use_mkldnn", {false}}});
VLOG(10) << sum_op->DebugStringEx(sub_scopes[0]);
sum_op->Run(*sub_scopes[0], places[0]);
WaitOnPlace(places[0]);

@ -429,7 +429,8 @@ class RecurrentGradOp : public RecurrentBase {
auto sum_op = framework::OpRegistry::CreateOp(
"sum", {{"X", {pg_names[param_id], new_inside_name}}},
{{"Out", {pg_names[param_id]}}}, framework::AttributeMap{});
{{"Out", {pg_names[param_id]}}},
framework::AttributeMap{{"use_mkldnn", {false}}});
sum_op->Run(cur_scope, place);
cur_scope.Rename(new_inside_name, inside_grad_name);

@ -0,0 +1,240 @@
// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
/*Licensed under the Apache License, Version 2.0(the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "mkldnn.hpp"
#include "paddle/fluid/framework/tensor.h"
#include "paddle/fluid/operators/math/selected_rows_functor.h"
#include "paddle/fluid/operators/sum_op.h"
#include "paddle/fluid/platform/device_context.h"
#include "paddle/fluid/platform/mkldnn_helper.h"
namespace paddle {
namespace operators {
using paddle::framework::Tensor;
using paddle::platform::MKLDNNDeviceContext;
using paddle::platform::CPUDeviceContext;
using framework::DataLayout;
using mkldnn::memory;
using mkldnn::primitive;
using mkldnn::stream;
using mkldnn::sum;
using mkldnn::reorder;
using platform::to_void_cast;
template <typename T>
class SumMKLDNNOpKernel : public paddle::framework::OpKernel<T> {
public:
void Compute(const paddle::framework::ExecutionContext& ctx) const override {
PADDLE_ENFORCE(paddle::platform::is_cpu_place(ctx.GetPlace()),
"It must use CPUPlace.");
auto& dev_ctx = ctx.template device_context<MKLDNNDeviceContext>();
const auto& mkldnn_engine = dev_ctx.GetEngine();
auto in_vars = ctx.MultiInputVar("X");
const int N = in_vars.size();
auto out_var = ctx.OutputVar("Out");
bool in_place = out_var == in_vars[0];
if (out_var->IsType<framework::LoDTensor>()) {
LoDTensor* output = ctx.Output<LoDTensor>("Out");
T* output_data = output->mutable_data<T>(ctx.GetPlace());
std::vector<int> dst_tz = framework::vectorize2int(output->dims());
auto src_tz = dst_tz;
memory::format output_format{memory::format::format_undef};
std::vector<float> scales;
std::vector<memory::primitive_desc> srcs_mpd;
std::vector<mkldnn::memory> srcs_mem;
PADDLE_ENFORCE(in_vars[0]->IsType<LoDTensor>(),
"Input[0] must be LoDTensors");
auto& input0 = in_vars[0]->Get<LoDTensor>();
PADDLE_ENFORCE(input0.layout() == DataLayout::kMKLDNN &&
input0.format() != memory::format::format_undef,
"Wrong layout/format for inputs[0]");
memory::format input_format = input0.format();
if (src_tz.size() == 1 && (input_format == memory::format::nchw ||
input_format == memory::format::nhwc)) {
input_format = memory::format::x;
}
if (src_tz.size() == 2 && (input_format == memory::format::nchw ||
input_format == memory::format::nhwc)) {
input_format = memory::format::nc;
}
for (int i = in_place ? 1 : 0; i < N; i++) {
PADDLE_ENFORCE(in_vars[i]->IsType<LoDTensor>(),
"all inputs must be all LoDTensors");
auto& input = in_vars[i]->Get<LoDTensor>();
PADDLE_ENFORCE(input.layout() == DataLayout::kMKLDNN &&
input.format() != memory::format::format_undef,
"Wrong layout/format for inputs");
if (input.numel() == 0) {
continue;
}
const T* input_data = input.data<T>();
auto src_md =
memory::desc(src_tz, memory::data_type::f32, input_format);
auto src_mpd = memory::primitive_desc(src_md, mkldnn_engine);
auto src_mem = memory(src_mpd, to_void_cast(input_data));
srcs_mpd.push_back(src_mpd);
srcs_mem.push_back(src_mem);
scales.push_back(1.0);
}
auto dst_md =
memory::desc(dst_tz, memory::data_type::f32, memory::format::any);
auto sum_pd = sum::primitive_desc(dst_md, scales, srcs_mpd);
std::shared_ptr<memory> dst_mem;
if (in_place) {
dst_mem.reset(new memory(sum_pd.dst_primitive_desc()));
} else {
dst_mem.reset(new memory(sum_pd.dst_primitive_desc(), output_data));
}
std::vector<mkldnn::primitive::at> inputs;
for (size_t i = 0; i < srcs_mem.size(); ++i) {
inputs.push_back(srcs_mem[i]);
}
auto sum_prim = mkldnn::sum(sum_pd, inputs, *dst_mem);
output_format = (memory::format)platform::GetMKLDNNFormat(sum_pd);
primitive reorder_prim;
std::shared_ptr<memory> target_mem;
if (in_place) {
output_format = input_format;
target_mem.reset(new memory(
{{{src_tz}, memory::data_type::f32, output_format}, mkldnn_engine},
output_data));
reorder_prim = reorder(*dst_mem, *target_mem);
}
std::vector<primitive> pipeline;
pipeline.push_back(sum_prim);
if (in_place) pipeline.push_back(reorder_prim);
stream(stream::kind::eager).submit(pipeline).wait();
output->set_layout(DataLayout::kMKLDNN);
output->set_format(output_format);
} else if (out_var->IsType<framework::SelectedRows>()) {
// TODO(@mozga-intel) Add MKLDNN SelectedRows support
std::unique_ptr<framework::SelectedRows> in0;
if (in_place) {
// If is in_place, we store the input[0] to in0
auto& in_sel0 = in_vars[0]->Get<SelectedRows>();
auto& rows = in_sel0.rows();
in0.reset(new framework::SelectedRows(rows, in_sel0.height()));
in0->mutable_value()->ShareDataWith(in_sel0.value());
}
auto get_selected_row = [&](size_t i) -> const SelectedRows& {
if (i == 0 && in0) {
return *in0.get();
} else {
return in_vars[i]->Get<SelectedRows>();
}
};
auto* out = ctx.Output<SelectedRows>("Out");
out->mutable_rows()->clear();
auto* out_value = out->mutable_value();
// Runtime InferShape
size_t first_dim = 0;
for (int i = 0; i < N; i++) {
auto& sel_row = get_selected_row(i);
first_dim += sel_row.rows().size();
}
auto in_dim =
framework::vectorize(get_selected_row(N - 1).value().dims());
in_dim[0] = static_cast<int64_t>(first_dim);
out_value->Resize(framework::make_ddim(in_dim));
// if all the input sparse vars are empty, no need to
// merge these vars.
if (first_dim == 0UL) {
return;
}
out_value->mutable_data<T>(ctx.GetPlace());
math::SelectedRowsAddTo<CPUDeviceContext, T> functor;
int64_t offset = 0;
for (int i = 0; i < N; i++) {
auto& sel_row = get_selected_row(i);
if (sel_row.rows().size() == 0) {
continue;
}
PADDLE_ENFORCE_EQ(out->height(), sel_row.height());
functor(ctx.template device_context<CPUDeviceContext>(), sel_row,
offset, out);
offset += sel_row.value().numel();
}
} else if (out_var->IsType<framework::LoDTensorArray>()) {
// TODO(@mozga-intel) Add MKLDNN LoDTensorArray support
auto& out_array = *out_var->GetMutable<framework::LoDTensorArray>();
for (size_t i = in_place ? 1 : 0; i < in_vars.size(); ++i) {
PADDLE_ENFORCE(in_vars[i]->IsType<framework::LoDTensorArray>(),
"Only support all inputs are TensorArray");
auto& in_array = in_vars[i]->Get<framework::LoDTensorArray>();
for (size_t i = 0; i < in_array.size(); ++i) {
if (in_array[i].numel() != 0) {
if (i >= out_array.size()) {
out_array.resize(i + 1);
}
if (out_array[i].numel() == 0) {
framework::TensorCopy(in_array[i], in_array[i].place(),
ctx.device_context(), &out_array[i]);
out_array[i].set_lod(in_array[i].lod());
} else {
PADDLE_ENFORCE(out_array[i].lod() == in_array[i].lod());
auto in = EigenVector<T>::Flatten(in_array[i]);
auto result = EigenVector<T>::Flatten(out_array[i]);
result.device(*ctx.template device_context<MKLDNNDeviceContext>()
.eigen_device()) = result + in;
}
}
}
}
} else {
PADDLE_THROW("Unexpected branch, output variable type is %s",
out_var->Type().name());
}
}
};
} // namespace operators
} // namespace paddle
REGISTER_OP_KERNEL(sum, MKLDNN, ::paddle::platform::CPUPlace,
paddle::operators::SumMKLDNNOpKernel<float>);

@ -18,6 +18,10 @@ limitations under the License. */
#include "paddle/fluid/framework/var_type_inference.h"
#include "paddle/fluid/operators/detail/safe_ref.h"
#ifdef PADDLE_WITH_MKLDNN
#include "paddle/fluid/platform/mkldnn_helper.h"
#endif
namespace paddle {
namespace operators {
using framework::Tensor;
@ -63,6 +67,18 @@ class SumOp : public framework::OperatorWithKernel {
framework::OpKernelType GetExpectedKernelType(
const framework::ExecutionContext& ctx) const override {
auto x_vars = ctx.MultiInputVar("X");
framework::LibraryType library{framework::LibraryType::kPlain};
framework::DataLayout layout{framework::DataLayout::kAnyLayout};
#ifdef PADDLE_WITH_MKLDNN
if (library == framework::LibraryType::kPlain &&
platform::CanMKLDNNBeUsed(ctx)) {
library = framework::LibraryType::kMKLDNN;
layout = framework::DataLayout::kMKLDNN;
}
#endif
if (x_vars[0]->IsType<framework::LoDTensor>()) {
int dtype = -1;
for (auto& x_var : x_vars) {
@ -80,26 +96,27 @@ class SumOp : public framework::OperatorWithKernel {
"Sum operator should have at least one tensor");
return framework::OpKernelType(
static_cast<framework::proto::VarType::Type>(dtype),
ctx.device_context());
static_cast<framework::proto::VarType::Type>(dtype), ctx.GetPlace(),
layout, library);
} else if (x_vars[0]->IsType<framework::SelectedRows>()) {
for (auto& var : x_vars) {
auto& value = var->Get<framework::SelectedRows>().value();
if (value.IsInitialized()) {
return framework::OpKernelType(framework::ToDataType(value.type()),
ctx.device_context());
ctx.device_context(), layout, library);
}
}
// if input sparse vars are not initialized, use an default kernel type.
return framework::OpKernelType(framework::proto::VarType::FP32,
ctx.device_context());
ctx.device_context(), layout, library);
} else if (x_vars[0]->IsType<framework::LoDTensorArray>()) {
for (auto& x_var : x_vars) {
auto& array = x_var->Get<framework::LoDTensorArray>();
for (auto& each : array) {
if (each.numel() != 0) {
return framework::OpKernelType(framework::ToDataType(each.type()),
ctx.device_context());
ctx.device_context(), layout,
library);
}
}
}
@ -116,6 +133,9 @@ class SumOpMaker : public framework::OpProtoAndCheckerMaker {
AddInput("X", "(vector<Tensor>) The input tensors of sum operator.")
.AsDuplicable();
AddOutput("Out", "(Tensor) The output tensor of sum operator.").Reuse("X");
AddAttr<bool>("use_mkldnn",
"(bool, default false) Only used in mkldnn kernel")
.SetDefault(false);
AddComment(R"DOC(
Sum operator.
@ -132,7 +152,6 @@ class SumOpVarTypeInference : public framework::VarTypeInference {
framework::BlockDesc* block) const override {
auto& inputs = op_desc.Input("X");
auto var_type = framework::proto::VarType::SELECTED_ROWS;
for (auto& name : op_desc.Input("X")) {
VLOG(10) << name << " "
<< block->FindRecursiveOrCreateVar(name).GetType();
@ -206,6 +225,7 @@ namespace ops = paddle::operators;
REGISTER_OPERATOR(sum, ops::SumOp, ops::SumOpMaker, ops::SumGradMaker,
ops::SumOpVarTypeInference);
REGISTER_OP_CPU_KERNEL(
sum, ops::SumKernel<paddle::platform::CPUDeviceContext, float>,
ops::SumKernel<paddle::platform::CPUDeviceContext, double>,

@ -203,11 +203,11 @@ class WhileGradOp : public framework::OperatorBase {
->set_lod(inside_tensor.lod());
}
}
auto new_inside_name = cur_scope.Rename(inside_grad_name);
auto sum_op = framework::OpRegistry::CreateOp(
"sum", {{"X", {pg_names[param_id], new_inside_name}}},
{{"Out", {pg_names[param_id]}}}, framework::AttributeMap{});
{{"Out", {pg_names[param_id]}}},
framework::AttributeMap{{"use_mkldnn", {false}}});
sum_op->Run(cur_scope, dev_place);
cur_scope.Rename(new_inside_name, inside_grad_name);
}

@ -99,5 +99,11 @@ inline mkldnn::memory::format GetMKLDNNFormat(const mkldnn::memory memory) {
memory.get_primitive_desc().desc().data.format);
}
inline mkldnn::memory::format GetMKLDNNFormat(
const mkldnn::sum::primitive_desc& memory) {
return static_cast<mkldnn::memory::format>(
memory.dst_primitive_desc().desc().data.format);
}
} // namespace platform
} // namespace paddle

@ -132,9 +132,9 @@ def _addup_repetitive_outputs_(op_descs):
for idx, op_desc in enumerate(op_descs):
for var_name in op_desc.input_arg_names():
if len(renamed_vars[var_name]) > 1:
pending_sum_ops.append(
(_create_op_desc_("sum", {"X": renamed_vars[var_name]},
{"Out": [var_name]}, {}), idx))
pending_sum_ops.append((_create_op_desc_(
"sum", {"X": renamed_vars[var_name]}, {"Out": [var_name]},
{"use_mkldnn": False}), idx))
renamed_vars[var_name] = [var_name]
for var_name in op_desc.output_arg_names():
if var_name == core.empty_var_name(
@ -161,8 +161,9 @@ def _addup_repetitive_outputs_(op_descs):
renamed_vars[var_name].append(new_name)
for var_name, inputs in renamed_vars.iteritems():
if len(inputs) > 1:
pending_sum_ops.append((_create_op_desc_(
"sum", {"X": inputs}, {"Out": [var_name]}, {}), len(op_descs)))
pending_sum_ops.append(
(_create_op_desc_("sum", {"X": inputs}, {"Out": [var_name]},
{"use_mkldnn": False}), len(op_descs)))
# sum_op descs are sorted according to their insert position
for p in reversed(pending_sum_ops):
op_descs.insert(p[1], p[0])

@ -24,8 +24,6 @@ __all__ = [
'GradientClipByValue',
'GradientClipByNorm',
'GradientClipByGlobalNorm',
'append_gradient_clip_ops',
'error_clip_callback',
]
@ -38,6 +36,25 @@ class BaseErrorClipAttr(object):
class ErrorClipByValue(BaseErrorClipAttr):
"""
Clips tensor values to the range [min, max].
Given a tensor t, this operation clips its value to min and max inplace.
- Any values less than min are set to min.
- Any values greater than max are set to max.
Args:
max (float): The maximum value to clip by.
min (float, optional): The minimum value to clip by. if not set by user, \
will be set to -max by framework.
Examples:
.. code-block:: python
var = fluid.framework.Variable(..., error_clip=ErrorClipByValue(max=5.0), ...)
"""
def __init__(self, max, min=None):
max = float(max)
if min is None:
@ -99,6 +116,31 @@ class NullGradientClipAttr(BaseGradientClipAttr):
class GradientClipByValue(BaseGradientClipAttr):
"""
Clips gradient values to the range [min, max].
Given a tensor t, this operation clips its value to min and max inplace.
- Any values less than min are set to min.
- Any values greater than max are set to max.
Args:
max (float): The maximum value to clip by.
min (float, optional): The minimum value to clip by. if not set by user, \
will be set to -max by framework.
Examples:
.. code-block:: python
w_param_attrs = ParamAttr(name=None,
initializer=UniformInitializer(low=-1.0, high=1.0, seed=0),
learning_rate=1.0,
regularizer=L1Decay(1.0),
trainable=True,
clip=GradientClipByValue(-1.0, 1.0))
y_predict = fluid.layers.fc(input=x, size=1, param_attr=w_param_attrs)
"""
def __init__(self, max, min=None):
max = float(max)
if min is None:
@ -120,6 +162,37 @@ class GradientClipByValue(BaseGradientClipAttr):
class GradientClipByNorm(BaseGradientClipAttr):
"""
Clips tensor values to a maximum L2-norm.
This operator limits the L2 norm of the input :math:`X` within :math:`max\_norm`.
If the L2 norm of :math:`X` is less than or equal to :math:`max\_norm`, :math:`Out`
will be the same as :math:`X`. If the L2 norm of :math:`X` is greater than
:math:`max\_norm`, :math:`X` will be linearly scaled to make the L2 norm of
:math:`Out` equal to :math:`max\_norm`, as shown in the following formula:
.. math::
Out = \\frac{max\_norm * X}{norm(X)},
where :math:`norm(X)` represents the L2 norm of :math:`X`.
Args:
clip_norm (float): The maximum norm value
Examples:
.. code-block:: python
w_param_attrs = ParamAttr(name=None,
initializer=UniformInitializer(low=-1.0, high=1.0, seed=0),
learning_rate=1.0,
regularizer=L1Decay(1.0),
trainable=True,
clip=GradientClipByNorm(clip_norm=2.0))
y_predict = fluid.layers.fc(input=x, size=1, param_attr=w_param_attrs)
"""
def __init__(self, clip_norm):
self.clip_norm = clip_norm
@ -135,6 +208,44 @@ class GradientClipByNorm(BaseGradientClipAttr):
class GradientClipByGlobalNorm(BaseGradientClipAttr):
"""
Clips values of multiple tensors by the ratio of the sum of their norms.
Given a list of tensors t_list, and a clipping ratio clip_norm, this
operation returns a list of clipped tensors list_clipped and the global
norm (global_norm) of all tensors in t_list.
To perform the clipping, the values :math:`t\_list[i]` are set to:
.. math::
t\_list[i] = t\_list[i] * \\frac{clip\_norm}{\max(global\_norm, clip\_norm)}
where:
.. math::
global\_norm = \sqrt{\sum_{i=0}^{N-1}(l2norm(t\_list[i]))^2}
If :math:`clip\_norm > global\_norm` then the entries in t_list remain as they are,
otherwise they're all shrunk by the global ratio.
Args:
clip_norm (float): The maximum norm value
group_name (str, optional): The group name for this clip.
Examples:
.. code-block:: python
p_g_clip = fluid.backward.append_backward(loss=avg_cost_clip)
with fluid.program_guard(main_program=prog_clip):
fluid.clip.set_gradient_clip(
fluid.clip.GradientClipByGlobalNorm(clip_norm=2.0))
p_g_clip = fluid.clip.append_gradient_clip_ops(p_g_clip)
"""
def __init__(self, clip_norm, group_name="default_group"):
if not isinstance(group_name, basestring):
raise TypeError("'group_name' must be a basestring.")
@ -183,15 +294,16 @@ class GradientClipByGlobalNorm(BaseGradientClipAttr):
def set_gradient_clip(clip, param_list=None, program=None):
"""
To specify parameters that require gradient clip.
Args:
clip(BaseGradientClipAttr): An instance of some derived class of BaseGradientClipAttr,
which describes the type and detailed attributes of required gradient clip.
param_list(list, None by default): Parameters that require gradient clip.
It can be a list of parameter or a list of parameter's name.
When it's None, all parameters in the program will be included.
program(Program, None by default): The program where parameters are.
Will be the default main program when assigned with None.
To specify parameters that require gradient clip.
Args:
clip(BaseGradientClipAttr): An instance of some derived class of BaseGradientClipAttr,
which describes the type and detailed attributes of required gradient clip.
param_list(list(Variable)): Parameters that require gradient clip.
It can be a list of parameter or a list of parameter's name.
When it's None, all parameters in the program will be included.
program(Program): The program where parameters are.
Will be the default main program when assigned with None.
"""
if not isinstance(clip, BaseGradientClipAttr):
raise TypeError(

@ -27,13 +27,30 @@ __all__ = ['Inferencer', ]
class Inferencer(object):
"""
Inferencer High Level API.
Args:
infer_func (Python func): Infer function that will return predict Variable
param_path (str): The path where the inference model is saved by fluid.io.save_params
place (Place): place to do the inference
parallel (bool): use parallel_executor to run the inference, it will use multi CPU/GPU.
Examples:
.. code-block:: python
def inference_program():
x = fluid.layers.data(name='x', shape=[13], dtype='float32')
y_predict = fluid.layers.fc(input=x, size=1, act=None)
return y_predict
place = fluid.CPUPlace()
inferencer = fluid.Inferencer(
infer_func=inference_program, param_path="/tmp/model", place=place)
"""
def __init__(self, infer_func, param_path, place=None, parallel=False):
"""
:param infer_func: a function that will return predict Variable
:param param_path: the path where the inference model is saved by fluid.io.save_params
:param place: place to do the inference
:param parallel: use parallel_executor to run the inference, it will use multi CPU/GPU.
"""
self.param_path = param_path
self.scope = core.Scope()
self.parallel = parallel
@ -60,9 +77,20 @@ class Inferencer(object):
def infer(self, inputs, return_numpy=True):
"""
:param inputs: a map of {"input_name": input_var} that will be feed into the inference program
to get the predict value
:return: the predict value of the inference model
Do Inference for Inputs
Args:
inputs (map): a map of {"input_name": input_var} that will be feed into the inference program
return_numpy (bool): transform return value into numpy or not
Returns:
Tensor or Numpy: the predict value of the inference model for the inputs
Examples:
.. code-block:: python
tensor_x = numpy.random.uniform(0, 10, [batch_size, 13]).astype("float32")
results = inferencer.infer({'x': tensor_x})
"""
if not isinstance(inputs, dict):
raise ValueError(

File diff suppressed because it is too large Load Diff

@ -185,12 +185,14 @@ def Print(input,
Returns:
Variable: Output tensor, same data with input tensor.
Examples:
.. code-block:: python
value = some_layer(...)
Print(value, summarize=10,
message="The content of some_layer: ")
value = some_layer(...)
Print(value, summarize=10,
message="The content of some_layer: ")
'''
helper = LayerHelper('print', **locals())
out = helper.create_tmp_variable(dtype=helper.input_dtype())
@ -1201,6 +1203,31 @@ class ConditionalBlockGuard(BlockGuard):
class ConditionalBlock(object):
'''
**ConditionalBlock**
ConditionalBlock is an operator that bind a block to a specific condition,
if the condition matches, the corresponding block will be executed.
Args:
inputs (Variable): bool conditions.
is_scalar_condition (bool): whether the branch is controled by a scalar.
name(str): name of this ConditionalBlock.
Examples:
.. code-block:: python
cond = layers.less_than(x=label, y=limit)
true_image, false_image = layers.split_lod_tensor(
input=image, mask=cond)
true_cond = layers.ConditionalBlock([true_image])
with true_cond.block():
...
with false_cond.block():
...
'''
def __init__(self, inputs, is_scalar_condition=False, name=None):
for each_input in inputs:
if not isinstance(each_input, Variable):

@ -16,7 +16,7 @@ All layers just related to the detection neural network.
"""
from layer_function_generator import generate_layer_fn
from layer_function_generator import autodoc
from layer_function_generator import autodoc, templatedoc
from ..layer_helper import LayerHelper
import tensor
import nn
@ -155,7 +155,7 @@ def detection_output(loc,
return nmsed_outs
@autodoc()
@templatedoc()
def detection_map(detect_res,
label,
class_num,
@ -166,6 +166,47 @@ def detection_map(detect_res,
input_states=None,
out_states=None,
ap_version='integral'):
"""
${comment}
Args:
detect_res: ${detect_res_comment}
label: ${label_comment}
class_num: ${class_num_comment}
background_label: ${background_label_comment}
overlap_threshold: ${overlap_threshold_comment}
evaluate_difficult: ${evaluate_difficult_comment}
has_state: ${has_state_comment}
input_states: If not None, It contains 3 elements:
1. pos_count ${pos_count_comment}.
2. true_pos ${true_pos_comment}.
3. false_pos ${false_pos_comment}.
out_states: If not None, it contains 3 elements.
1. accum_pos_count ${accum_pos_count_comment}.
2. accum_true_pos ${accum_true_pos_comment}.
3. accum_false_pos ${accum_false_pos_comment}.
ap_version: ${ap_type_comment}
Returns:
${map_comment}
Examples:
.. code-block:: python
detect_res = fluid.layers.data(
name='detect_res',
shape=[10, 6],
append_batch_size=False,
dtype='float32')
label = fluid.layers.data(
name='label',
shape=[10, 6],
append_batch_size=False,
dtype='float32')
map_out = fluid.layers.detection_map(detect_res, label, 21)
"""
helper = LayerHelper("detection_map", **locals())
def __create_var(type):

File diff suppressed because it is too large Load Diff

@ -230,7 +230,11 @@ def sums(input, out=None):
helper = LayerHelper('sum', **locals())
if out is None:
out = helper.create_tmp_variable(dtype=helper.input_dtype())
helper.append_op(type='sum', inputs={'X': input}, outputs={'Out': out})
helper.append_op(
type='sum',
inputs={'X': input},
outputs={'Out': out},
attrs={'use_mkldnn': False})
return out
@ -380,7 +384,7 @@ def argmin(x, axis=0):
"""
**argmin**
This function computes the indices of the min elements
This function computes the indices of the min elements
of the input tensor's element along the provided axis.
Args:
@ -395,7 +399,7 @@ def argmin(x, axis=0):
.. code-block:: python
out = fluid.layers.argmin(x=in, axis=0)
out = fluid.layers.argmin(x=in, axis=-1)
out = fluid.layers.argmin(x=in, axis=-1)
"""
helper = LayerHelper("arg_min", **locals())
out = helper.create_tmp_variable(VarDesc.VarType.INT64)
@ -411,7 +415,7 @@ def argmax(x, axis=0):
"""
**argmax**
This function computes the indices of the max elements
This function computes the indices of the max elements
of the input tensor's element along the provided axis.
Args:
@ -426,7 +430,7 @@ def argmax(x, axis=0):
.. code-block:: python
out = fluid.layers.argmax(x=in, axis=0)
out = fluid.layers.argmax(x=in, axis=-1)
out = fluid.layers.argmax(x=in, axis=-1)
"""
helper = LayerHelper("arg_max", **locals())
out = helper.create_tmp_variable(VarDesc.VarType.INT64)
@ -495,9 +499,9 @@ def reverse(x, axis):
Args:
x(Vairbale): the input to be reversed.
axis(int|tuple|list): Axis that along which order of elements
is reversed. If it is a tuple or a list, reversing
will be apply on each axis in the tuple or list.
axis(int|tuple|list): Axis that along which order of elements
is reversed. If it is a tuple or a list, reversing
will be apply on each axis in the tuple or list.
Returns:
Variable: The reversed tensor.
@ -528,9 +532,9 @@ def save(x, file_path, overwrite=True):
Args:
x(variable): The Tensor/LoDTensor to be saved.
file_path(str): The file path where the variable will be saved.
overwrite(bool): Whether or not cover the given file when it has already
existed. If it's set 'False' and the file is existed, a runtime
error will be thrown.
overwrite(bool): Whether or not cover the given file when it has already
existed. If it's set 'False' and the file is existed, a runtime
error will be thrown.
"""
helper = LayerHelper("save", **locals())
helper.append_op(
@ -550,8 +554,8 @@ def save_combine(x, file_path, overwrite=True):
a single file.
file_path(str): The file path where variables will be saved.
overwrite(bool): Whether or not cover the given file when it has already
existed. If it's set 'False' and the file is existed, a runtime
error will be thrown.
existed. If it's set 'False' and the file is existed, a runtime
error will be thrown.
Returns:
There is no return value.

File diff suppressed because it is too large Load Diff

Some files were not shown because too many files have changed in this diff Show More

Loading…
Cancel
Save