Merge branch 'develop' of upstream into fix_cross_entropy_doc

del_some_in_makelist
Yibing Liu 7 years ago
commit 3db7c82955

@ -79,7 +79,7 @@ class Optimizer(object):
def minimize(self, loss, parameter_list): def minimize(self, loss, parameter_list):
"""Add operations to minimize `loss` by updating `parameter_list`. """Add operations to minimize `loss` by updating `parameter_list`.
This method combines interface `append_backward_ops()` and This method combines interface `append_backward()` and
`create_optimization_pass()` into one. `create_optimization_pass()` into one.
""" """
params_grads = self.create_backward_pass(loss, parameter_list) params_grads = self.create_backward_pass(loss, parameter_list)

@ -37,7 +37,7 @@ cc_test(operator_test SRCS operator_test.cc DEPS operator op_registry init)
cc_library(proto_desc SRCS var_desc.cc op_desc.cc block_desc.cc program_desc.cc DEPS shape_inference op_info operator glog) cc_library(proto_desc SRCS var_desc.cc op_desc.cc block_desc.cc program_desc.cc DEPS shape_inference op_info operator glog)
cc_library(op_registry SRCS op_registry.cc DEPS op_proto_maker op_info operator glog proto_desc) cc_library(op_registry SRCS op_registry.cc DEPS op_proto_maker op_info operator glog proto_desc)
cc_test(op_registry_test SRCS op_registry_test.cc DEPS op_registry) nv_test(op_registry_test SRCS op_registry_test.cc DEPS op_registry)
py_proto_compile(framework_py_proto SRCS framework.proto) py_proto_compile(framework_py_proto SRCS framework.proto)
# Generate an empty __init__.py to make framework_py_proto as a valid python module. # Generate an empty __init__.py to make framework_py_proto as a valid python module.

@ -20,7 +20,11 @@ namespace framework {
// For more details about the design of LibraryType, Please refer to // For more details about the design of LibraryType, Please refer to
// https://github.com/PaddlePaddle/Paddle/blob/develop/doc/design/operator_kernel_type.md#library // https://github.com/PaddlePaddle/Paddle/blob/develop/doc/design/operator_kernel_type.md#library
enum class LibraryType { kPlain = 0, kMKLDNN = 1, kCUDNN = 2 }; enum class LibraryType {
kPlain = 0,
kMKLDNN = 1,
kCUDNN = 2,
};
inline std::string LibraryTypeToString(const LibraryType& library_type) { inline std::string LibraryTypeToString(const LibraryType& library_type) {
switch (library_type) { switch (library_type) {
@ -31,7 +35,26 @@ inline std::string LibraryTypeToString(const LibraryType& library_type) {
case LibraryType::kCUDNN: case LibraryType::kCUDNN:
return "CUDNN"; return "CUDNN";
default: default:
PADDLE_THROW("unknown LibraryType %d", library_type); PADDLE_THROW("unknown LibraryType %d", static_cast<int>(library_type));
}
}
inline LibraryType StringToLibraryType(const char* ctype) {
std::string s(ctype);
if (s == std::string("PLAIN")) {
return LibraryType::kPlain;
} else if (s == std::string("MKLDNN")) {
return LibraryType::kMKLDNN;
} else if (s == std::string("CUDNN")) {
return LibraryType::kCUDNN;
// To be compatible with register macro.
// CPU, CUDA, PLAIN are same library type.
} else if (s == std::string("CPU")) {
return LibraryType::kPlain;
} else if (s == std::string("CUDA")) {
return LibraryType::kPlain;
} else {
PADDLE_THROW("Unknown LibraryType %s", s.c_str());
} }
} }

@ -88,6 +88,14 @@ OpDesc::OpDesc(const std::string &type, const VariableNameMap &inputs,
need_update_ = true; need_update_ = true;
} }
void OpDesc::CopyFrom(const OpDesc &op_desc) {
desc_.set_type(op_desc.Type());
inputs_ = op_desc.inputs_;
outputs_ = op_desc.outputs_;
attrs_ = op_desc.attrs_;
need_update_ = true;
}
OpDesc::OpDesc(const proto::OpDesc &desc, ProgramDesc *prog) OpDesc::OpDesc(const proto::OpDesc &desc, ProgramDesc *prog)
: desc_(desc), need_update_(false) { : desc_(desc), need_update_(false) {
// restore inputs_ // restore inputs_

@ -35,6 +35,8 @@ class OpDesc {
OpDesc(const proto::OpDesc &desc, ProgramDesc *prog); OpDesc(const proto::OpDesc &desc, ProgramDesc *prog);
void CopyFrom(const OpDesc &op_desc);
proto::OpDesc *Proto(); proto::OpDesc *Proto();
std::string Type() const { return desc_.type(); } std::string Type() const { return desc_.type(); }

@ -79,30 +79,31 @@ struct OpKernelRegistrarFunctor<PlaceType, false, I, KernelTypes...> {
using KERNEL_TYPE = using KERNEL_TYPE =
typename std::tuple_element<I, std::tuple<KernelTypes...>>::type; typename std::tuple_element<I, std::tuple<KernelTypes...>>::type;
void operator()(const char* op_type) const { void operator()(const char* op_type, const char* library_type) const {
using T = typename KERNEL_TYPE::ELEMENT_TYPE; using T = typename KERNEL_TYPE::ELEMENT_TYPE;
OpKernelType key(ToDataType(std::type_index(typeid(T))), PlaceType()); OpKernelType key(ToDataType(std::type_index(typeid(T))), PlaceType(),
DataLayout::kAnyLayout, StringToLibraryType(library_type));
OperatorWithKernel::AllOpKernels()[op_type][key].reset(new KERNEL_TYPE); OperatorWithKernel::AllOpKernels()[op_type][key].reset(new KERNEL_TYPE);
constexpr auto size = std::tuple_size<std::tuple<KernelTypes...>>::value; constexpr auto size = std::tuple_size<std::tuple<KernelTypes...>>::value;
OpKernelRegistrarFunctor<PlaceType, I + 1 == size, I + 1, KernelTypes...> OpKernelRegistrarFunctor<PlaceType, I + 1 == size, I + 1, KernelTypes...>
func; func;
func(op_type); func(op_type, library_type);
} }
}; };
template <typename PlaceType, size_t I, typename... KernelType> template <typename PlaceType, size_t I, typename... KernelType>
struct OpKernelRegistrarFunctor<PlaceType, true, I, KernelType...> { struct OpKernelRegistrarFunctor<PlaceType, true, I, KernelType...> {
void operator()(const char* op_type) const {} void operator()(const char* op_type, const char* library_type) const {}
}; };
// User can register many kernel in one place. The data type could be different. // User can register many kernel in one place. The data type could be different.
template <typename PlaceType, typename... KernelType> template <typename PlaceType, typename... KernelType>
class OpKernelRegistrar : public Registrar { class OpKernelRegistrar : public Registrar {
public: public:
explicit OpKernelRegistrar(const char* op_type) { explicit OpKernelRegistrar(const char* op_type, const char* library_type) {
OpKernelRegistrarFunctor<PlaceType, false, 0, KernelType...> func; OpKernelRegistrarFunctor<PlaceType, false, 0, KernelType...> func;
func(op_type); func(op_type, library_type);
} }
}; };
@ -181,7 +182,8 @@ class OpKernelRegistrar : public Registrar {
__reg_op_kernel_##op_type##_##DEVICE_TYPE##__, \ __reg_op_kernel_##op_type##_##DEVICE_TYPE##__, \
"REGISTER_OP_KERNEL must be called in global namespace"); \ "REGISTER_OP_KERNEL must be called in global namespace"); \
static ::paddle::framework::OpKernelRegistrar<place_class, __VA_ARGS__> \ static ::paddle::framework::OpKernelRegistrar<place_class, __VA_ARGS__> \
__op_kernel_registrar_##op_type##_##DEVICE_TYPE##__(#op_type); \ __op_kernel_registrar_##op_type##_##DEVICE_TYPE##__(#op_type, \
#DEVICE_TYPE); \
int TouchOpKernelRegistrar_##op_type##_##DEVICE_TYPE() { \ int TouchOpKernelRegistrar_##op_type##_##DEVICE_TYPE() { \
__op_kernel_registrar_##op_type##_##DEVICE_TYPE##__.Touch(); \ __op_kernel_registrar_##op_type##_##DEVICE_TYPE##__.Touch(); \
return 0; \ return 0; \

@ -1,3 +1,17 @@
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "paddle/framework/op_registry.h" #include "paddle/framework/op_registry.h"
#include <gtest/gtest.h> #include <gtest/gtest.h>
@ -182,3 +196,71 @@ TEST(OperatorRegistrar, Test) {
using namespace paddle::framework; using namespace paddle::framework;
OperatorRegistrar<CosineOpComplete, CosineOpProtoAndCheckerMaker> reg("cos"); OperatorRegistrar<CosineOpComplete, CosineOpProtoAndCheckerMaker> reg("cos");
} }
namespace paddle {
namespace framework {
class OpKernelTestMaker : public OpProtoAndCheckerMaker {
public:
OpKernelTestMaker(OpProto* proto, OpAttrChecker* op_checker)
: OpProtoAndCheckerMaker(proto, op_checker) {
AddComment("NoGradOp, same input output. no Grad");
}
};
class OpWithKernelTest : public OperatorWithKernel {
public:
using OperatorWithKernel::OperatorWithKernel;
protected:
void InferShape(InferShapeContext* ctx) const override {}
framework::OpKernelType GetActualKernelType(
const framework::ExecutionContext& ctx) const override {
return framework::OpKernelType(proto::DataType::FP32, ctx.device_context());
}
};
template <typename DeviceContext, typename T>
class OpKernelTest : public paddle::framework::OpKernel<T> {
public:
void Compute(const paddle::framework::ExecutionContext& ctx) const {}
};
} // namespace framework
} // namespace paddle
REGISTER_OP_WITHOUT_GRADIENT(op_with_kernel,
paddle::framework::OpWithKernelTest,
paddle::framework::OpKernelTestMaker);
REGISTER_OP_CPU_KERNEL(
op_with_kernel,
paddle::framework::OpKernelTest<paddle::platform::CPUDeviceContext, float>);
REGISTER_OP_CUDA_KERNEL(op_with_kernel,
paddle::framework::OpKernelTest<
paddle::platform::CUDADeviceContext, float>);
TEST(OperatorRegistrar, CPU) {
paddle::framework::proto::OpDesc op_desc;
paddle::platform::CPUPlace cpu_place;
paddle::framework::Scope scope;
op_desc.set_type("op_with_kernel");
auto op = paddle::framework::OpRegistry::CreateOp(op_desc);
op->Run(scope, cpu_place);
}
#ifdef PADDLE_WITH_CUDA
TEST(OperatorRegistrar, CUDA) {
paddle::framework::proto::OpDesc op_desc;
paddle::platform::CUDAPlace cuda_place(0);
paddle::framework::Scope scope;
op_desc.set_type("op_with_kernel");
auto op = paddle::framework::OpRegistry::CreateOp(op_desc);
op->Run(scope, cuda_place);
}
#endif

@ -74,7 +74,7 @@ const proto::TensorDesc &VarDesc::tensor_desc() const {
case proto::VarDesc::LOD_TENSOR_ARRAY: case proto::VarDesc::LOD_TENSOR_ARRAY:
return desc_.tensor_array().tensor(); return desc_.tensor_array().tensor();
default: default:
PADDLE_THROW("Unexpected branch."); PADDLE_THROW("The type of var '", this->Name(), "' is unsupported.");
} }
} }

@ -315,6 +315,10 @@ class CudnnConvGradOpKernel : public framework::OpKernel<T> {
} // namespace operators } // namespace operators
} // namespace paddle } // namespace paddle
REGISTER_OP_KERNEL(conv2d, CUDNN, paddle::platform::CUDAPlace,
paddle::operators::CudnnConvOpKernel<float>,
paddle::operators::CudnnConvOpKernel<double>);
REGISTER_OP_CUDA_KERNEL(conv2d_cudnn, REGISTER_OP_CUDA_KERNEL(conv2d_cudnn,
paddle::operators::CudnnConvOpKernel<float>, paddle::operators::CudnnConvOpKernel<float>,
paddle::operators::CudnnConvOpKernel<double>); paddle::operators::CudnnConvOpKernel<double>);

@ -302,8 +302,29 @@ void set_constant(const platform::DeviceContext& context,
#endif #endif
} }
template <typename T>
struct RowwiseAdd<platform::CPUDeviceContext, T> {
void operator()(const platform::CPUDeviceContext& context,
const framework::Tensor& input,
const framework::Tensor& vector, framework::Tensor* output) {
auto in_dims = input.dims();
auto size = input.numel() / in_dims[0];
PADDLE_ENFORCE_EQ(vector.numel(), size);
PADDLE_ENFORCE_EQ(output->dims(), in_dims);
auto in = framework::EigenMatrix<T>::From(input);
auto vec = framework::EigenVector<T>::Flatten(vector);
auto out = framework::EigenMatrix<T>::From(*output);
for (int64_t i = 0; i < in_dims[0]; ++i) {
out.chip(i, 0) = in.chip(i, 0) + vec;
}
}
};
template struct RowwiseAdd<platform::CPUDeviceContext, float>; template struct RowwiseAdd<platform::CPUDeviceContext, float>;
template struct RowwiseAdd<platform::CPUDeviceContext, double>; template struct RowwiseAdd<platform::CPUDeviceContext, double>;
template struct ColwiseSum<platform::CPUDeviceContext, float>; template struct ColwiseSum<platform::CPUDeviceContext, float>;
template struct ColwiseSum<platform::CPUDeviceContext, double>; template struct ColwiseSum<platform::CPUDeviceContext, double>;

@ -273,6 +273,35 @@ void set_constant_with_place<platform::CUDAPlace>(
TensorSetConstantGPU(context, tensor, value)); TensorSetConstantGPU(context, tensor, value));
} }
template <typename T>
__global__ void RowwiseAddKernel(const T* a, const T* b, T* c, int width,
int num) {
T tmp = 1.0 / width;
for (int i = blockIdx.x * blockDim.x + threadIdx.x; i < num;
i += blockDim.x * gridDim.x) {
int h = i * tmp;
int w = i - h * width;
c[i] = a[i] + b[w];
}
}
template <typename T>
struct RowwiseAdd<platform::CUDADeviceContext, T> {
void operator()(const platform::CUDADeviceContext& context,
const framework::Tensor& input,
const framework::Tensor& vector, framework::Tensor* output) {
auto in_dims = input.dims();
auto size = input.numel() / in_dims[0];
PADDLE_ENFORCE_EQ(vector.numel(), size);
PADDLE_ENFORCE_EQ(output->dims(), in_dims);
int blocks = 512;
int grids = (input.numel() + blocks - 1) / blocks;
RowwiseAddKernel<T><<<grids, blocks, 0, context.stream()>>>(
input.data<T>(), vector.data<T>(), output->data<T>(),
static_cast<int>(in_dims[1]), static_cast<int>(input.numel()));
}
};
template struct RowwiseAdd<platform::CUDADeviceContext, float>; template struct RowwiseAdd<platform::CUDADeviceContext, float>;
template struct RowwiseAdd<platform::CUDADeviceContext, double>; template struct RowwiseAdd<platform::CUDADeviceContext, double>;
template struct ColwiseSum<platform::CUDADeviceContext, float>; template struct ColwiseSum<platform::CUDADeviceContext, float>;

@ -45,25 +45,6 @@ void Transpose<DeviceContext, T, Rank>::operator()(
eigen_out.device(*dev) = eigen_in.shuffle(permute); eigen_out.device(*dev) = eigen_in.shuffle(permute);
} }
template <typename DeviceContext, typename T>
void RowwiseAdd<DeviceContext, T>::operator()(const DeviceContext& context,
const framework::Tensor& input,
const framework::Tensor& vector,
framework::Tensor* output) {
auto in_dims = input.dims();
auto size = input.numel() / in_dims[0];
PADDLE_ENFORCE_EQ(vector.numel(), size);
PADDLE_ENFORCE_EQ(output->dims(), in_dims);
auto in = framework::EigenMatrix<T>::From(input);
auto vec = framework::EigenMatrix<T>::From(vector);
auto out = framework::EigenMatrix<T>::From(*output);
Eigen::array<int, 2> shape({{1, static_cast<int>(size)}});
Eigen::array<int, 2> bcast({{static_cast<int>(in_dims[0]), 1}});
out.device(*context.eigen_device()) =
in + vec.reshape(shape).broadcast(bcast);
}
template <typename DeviceContext, typename T> template <typename DeviceContext, typename T>
void ColwiseSum<DeviceContext, T>::operator()(const DeviceContext& context, void ColwiseSum<DeviceContext, T>::operator()(const DeviceContext& context,
const framework::Tensor& input, const framework::Tensor& input,

@ -171,12 +171,23 @@ void BindBlockDesc(py::module &m) {
std::string name = byte_name; std::string name = byte_name;
return self.HasVar(name); return self.HasVar(name);
}) })
.def("has_var_recursive",
[](BlockDesc &self, py::bytes byte_name) {
std::string name = byte_name;
return self.HasVarRecursive(name);
})
.def("find_var", .def("find_var",
[](BlockDesc &self, py::bytes byte_name) { [](BlockDesc &self, py::bytes byte_name) {
std::string name = byte_name; std::string name = byte_name;
return self.FindVar(name); return self.FindVar(name);
}, },
py::return_value_policy::reference) py::return_value_policy::reference)
.def("find_var_recursive",
[](BlockDesc &self, py::bytes byte_name) {
std::string name = byte_name;
return self.FindVarRecursive(name);
},
py::return_value_policy::reference)
.def("all_vars", &BlockDesc::AllVars, py::return_value_policy::reference) .def("all_vars", &BlockDesc::AllVars, py::return_value_policy::reference)
.def("op_size", &BlockDesc::OpSize) .def("op_size", &BlockDesc::OpSize)
.def("op", &BlockDesc::Op, py::return_value_policy::reference) .def("op", &BlockDesc::Op, py::return_value_policy::reference)
@ -204,7 +215,7 @@ void BindVarDsec(py::module &m) {
.def("set_shape", &VarDesc::SetShape) .def("set_shape", &VarDesc::SetShape)
.def("set_dtype", &VarDesc::SetDataType) .def("set_dtype", &VarDesc::SetDataType)
.def("shape", &VarDesc::Shape, py::return_value_policy::reference) .def("shape", &VarDesc::Shape, py::return_value_policy::reference)
.def("dtype", &VarDesc::GetDataType) .def("dtype", &VarDesc::GetDataType, py::return_value_policy::reference)
.def("lod_level", &VarDesc::GetLodLevel) .def("lod_level", &VarDesc::GetLodLevel)
.def("set_lod_level", &VarDesc::SetLoDLevel) .def("set_lod_level", &VarDesc::SetLoDLevel)
.def("type", &VarDesc::GetType) .def("type", &VarDesc::GetType)
@ -236,14 +247,22 @@ void BindOpDesc(py::module &m) {
.value("BLOCK", proto::AttrType::BLOCK); .value("BLOCK", proto::AttrType::BLOCK);
py::class_<OpDesc> op_desc(m, "OpDesc", ""); py::class_<OpDesc> op_desc(m, "OpDesc", "");
op_desc.def("type", &OpDesc::Type) op_desc
.def("__init__", [](OpDesc &self) { new (&self) OpDesc(); },
py::return_value_policy::reference)
.def("copy_from", &OpDesc::CopyFrom)
.def("type", &OpDesc::Type)
.def("set_type", &OpDesc::SetType) .def("set_type", &OpDesc::SetType)
.def("input", &OpDesc::Input) .def("input", &OpDesc::Input)
.def("input_names", &OpDesc::InputNames) .def("input_names", &OpDesc::InputNames)
.def("set_input", &OpDesc::SetInput)
.def("output", &OpDesc::Output) .def("output", &OpDesc::Output)
.def("output_names", &OpDesc::OutputNames) .def("output_names", &OpDesc::OutputNames)
.def("set_input", &OpDesc::SetInput)
.def("set_output", &OpDesc::SetOutput) .def("set_output", &OpDesc::SetOutput)
.def("input_arg_names", &OpDesc::InputArgumentNames)
.def("output_arg_names", &OpDesc::OutputArgumentNames)
.def("rename_input", &OpDesc::RenameInput)
.def("rename_output", &OpDesc::RenameOutput)
.def("has_attr", &OpDesc::HasAttr) .def("has_attr", &OpDesc::HasAttr)
.def("attr_type", &OpDesc::GetAttrType) .def("attr_type", &OpDesc::GetAttrType)
.def("attr_names", &OpDesc::AttrNames) .def("attr_names", &OpDesc::AttrNames)

@ -269,22 +269,21 @@ All parameter, weight, gradient are variables in Paddle.
} }
return ret_values; return ret_values;
}); });
m.def("get_grad_op_descs", m.def(
[](const OpDesc &op_desc, "get_grad_op_desc", [](const OpDesc &op_desc,
const std::unordered_set<std::string> &no_grad_set, const std::unordered_set<std::string> &no_grad_set,
std::unordered_map<std::string, std::string> &grad_to_var,
const std::vector<BlockDesc *> &grad_sub_block) { const std::vector<BlockDesc *> &grad_sub_block) {
std::unordered_map<std::string, std::string> grad_to_var;
std::vector<std::unique_ptr<OpDesc>> grad_op_descs = std::vector<std::unique_ptr<OpDesc>> grad_op_descs =
framework::OpInfoMap::Instance() framework::OpInfoMap::Instance()
.Get(op_desc.Type()) .Get(op_desc.Type())
.GradOpMaker()(op_desc, no_grad_set, &grad_to_var, .GradOpMaker()(op_desc, no_grad_set, &grad_to_var,
grad_sub_block); grad_sub_block);
std::vector<OpDesc *> grad_op_desc_ptrs(grad_op_descs.size()); std::vector<OpDesc *> grad_op_desc_ptrs(grad_op_descs.size());
std::transform( std::transform(grad_op_descs.begin(), grad_op_descs.end(),
grad_op_descs.begin(), grad_op_descs.end(),
grad_op_desc_ptrs.begin(), grad_op_desc_ptrs.begin(),
[](std::unique_ptr<OpDesc> &p) { return p.release(); }); [](std::unique_ptr<OpDesc> &p) { return p.release(); });
return grad_op_desc_ptrs; return std::make_pair(grad_op_desc_ptrs, grad_to_var);
}); });
m.def("prune", [](const ProgramDesc &origin, m.def("prune", [](const ProgramDesc &origin,
const std::vector<std::array<size_t, 2>> &targets) { const std::vector<std::array<size_t, 2>> &targets) {
@ -301,6 +300,8 @@ All parameter, weight, gradient are variables in Paddle.
InferenceOptimize(*(origin.Proto()), &pruned_desc); InferenceOptimize(*(origin.Proto()), &pruned_desc);
return new ProgramDesc(pruned_desc); return new ProgramDesc(pruned_desc);
}); });
m.def("empty_var_name", []() { return framework::kEmptyVarName; });
m.def("grad_var_suffix", []() { return framework::kGradVarSuffix; });
m.def_submodule( m.def_submodule(
"var_names", "var_names",
"The module will return special predefined variable name in Paddle") "The module will return special predefined variable name in Paddle")

File diff suppressed because it is too large Load Diff

@ -95,7 +95,9 @@ class DistributeTranspiler:
""" """
if program is None: if program is None:
program = default_main_program() program = default_main_program()
self.program = program
self.trainers = trainers self.trainers = trainers
self.optimize_ops = optimize_ops
self._optimize_distributed( self._optimize_distributed(
optimize_ops, optimize_ops,
program, program,
@ -156,9 +158,10 @@ class DistributeTranspiler:
attrs={"endpoints": pserver_endpoints, attrs={"endpoints": pserver_endpoints,
"epmap": epmap}) "epmap": epmap})
def get_trainer_program(optimize_ops, program): def get_trainer_program(self):
# remove optimize ops and add a send op to main_program # remove optimize ops and add a send op to main_program
program.global_block().delete_ops(optimize_ops) self.program.global_block().delete_ops(self.optimize_ops)
return self.program
def _create_var_for_trainers(self, block, var, trainers): def _create_var_for_trainers(self, block, var, trainers):
var_list = [] var_list = []
@ -210,7 +213,6 @@ class DistributeTranspiler:
if opt_op.inputs.has_key("Grad"): if opt_op.inputs.has_key("Grad"):
if opt_op.inputs["Grad"].name in grad_var_names: if opt_op.inputs["Grad"].name in grad_var_names:
print "appending ", opt_op.type, opt_op.inputs
optimize_sub_program.global_block().append_op( optimize_sub_program.global_block().append_op(
type=opt_op.type, type=opt_op.type,
inputs=opt_op.inputs, inputs=opt_op.inputs,

@ -663,7 +663,7 @@ class Block(object):
end = list(self.ops).index(ops[-1]) end = list(self.ops).index(ops[-1])
except Exception, e: except Exception, e:
raise e raise e
self.desc.remove_op(start, end) self.desc.remove_op(start, end + 1)
def prepend_op(self, *args, **kwargs): def prepend_op(self, *args, **kwargs):
op_desc = self.desc.prepend_op() op_desc = self.desc.prepend_op()
@ -846,9 +846,11 @@ class Program(object):
self.sync_with_cpp() self.sync_with_cpp()
return param_to_grad_info return param_to_grad_info
def create_block(self): def create_block(self, parent_idx=None):
new_block_idx = len(self.blocks) new_block_idx = len(self.blocks)
self.desc.append_block(self.current_block().desc) parent = self.current_block() if parent_idx is None else self.block(
parent_idx)
self.desc.append_block(parent.desc)
self.current_block_idx = new_block_idx self.current_block_idx = new_block_idx
self.blocks.append(Block(self, self.current_block_idx)) self.blocks.append(Block(self, self.current_block_idx))
return self.current_block() return self.current_block()

@ -1,7 +1,7 @@
from collections import defaultdict from collections import defaultdict
import framework import framework
from backward import append_backward_ops from backward import append_backward
from framework import unique_name, program_guard from framework import unique_name, program_guard
from initializer import Constant from initializer import Constant
from layer_helper import LayerHelper from layer_helper import LayerHelper
@ -194,10 +194,10 @@ class Optimizer(object):
no_grad_set=None): no_grad_set=None):
"""Add operations to minimize `loss` by updating `parameter_list`. """Add operations to minimize `loss` by updating `parameter_list`.
This method combines interface `append_backward_ops()` and This method combines interface `append_backward()` and
`create_optimization_pass()` into one. `create_optimization_pass()` into one.
""" """
params_grads = append_backward_ops(loss, parameter_list, no_grad_set) params_grads = append_backward(loss, parameter_list, no_grad_set)
params_grads = append_gradient_clip_ops(params_grads) params_grads = append_gradient_clip_ops(params_grads)

@ -38,35 +38,43 @@ train_reader = paddle.batch(
place = fluid.CPUPlace() place = fluid.CPUPlace()
exe = fluid.Executor(place) exe = fluid.Executor(place)
t = fluid.DistributeTranspiler() t = fluid.DistributeTranspiler()
# all parameter server endpoints list for spliting parameters
pserver_endpoints = os.getenv("PSERVERS") pserver_endpoints = os.getenv("PSERVERS")
# server endpoint for current node
current_endpoint = os.getenv("SERVER_ENDPOINT")
# run as trainer or parameter server
training_role = os.getenv("TRAINING_ROLE", training_role = os.getenv("TRAINING_ROLE",
"TRAINER") # get the training role: trainer/pserver "TRAINER") # get the training role: trainer/pserver
t.transpile(optimize_ops, params_grads, pservers=pserver_endpoints, trainers=1) t.transpile(optimize_ops, params_grads, pservers=pserver_endpoints, trainers=2)
if training_role == "PSERVER": if training_role == "PSERVER":
pserver_prog = t.get_pserver_program(pserver_endpoints, optimize_ops) if not current_endpoint:
print("need env SERVER_ENDPOINT")
exit(1)
pserver_prog = t.get_pserver_program(current_endpoint, optimize_ops)
exe.run(fluid.default_startup_program()) exe.run(fluid.default_startup_program())
exe.run(pserver_prog) exe.run(pserver_prog)
elif training_role == "TRAINER": elif training_role == "TRAINER":
trainer_prog = t.get_trainer_program()
feeder = fluid.DataFeeder(feed_list=[images, label], place=place) feeder = fluid.DataFeeder(feed_list=[images, label], place=place)
exe.run(fluid.default_startup_program()) exe.run(fluid.default_startup_program())
for pass_id in range(PASS_NUM): for pass_id in range(PASS_NUM):
accuracy.reset(exe) accuracy.reset(exe)
batch_id = 0
for data in train_reader(): for data in train_reader():
loss, acc = exe.run(fluid.default_main_program(), loss, acc = exe.run(trainer_prog,
feed=feeder.feed(data), feed=feeder.feed(data),
fetch_list=[avg_cost] + accuracy.metrics) fetch_list=[avg_cost] + accuracy.metrics)
pass_acc = accuracy.eval(exe) pass_acc = accuracy.eval(exe)
# print loss, acc if batch_id % 100 == 0:
if loss < 10.0 and pass_acc > 0.9: print("batch_id %d, loss: %f, acc: %f" %
# if avg cost less than 10.0 and accuracy is larger than 0.9, we think our code is good. (batch_id, loss, pass_acc))
exit(0) batch_id += 1
pass_acc = accuracy.eval(exe) pass_acc = accuracy.eval(exe)
print("pass_id=" + str(pass_id) + " pass_acc=" + str(pass_acc)) print("pass_id=" + str(pass_id) + " pass_acc=" + str(pass_acc))
else: else:
print("environment var TRAINER_ROLE should be TRAINER os PSERVER") print("environment var TRAINER_ROLE should be TRAINER os PSERVER")
exit(1)

@ -4,7 +4,7 @@ import random
import itertools import itertools
import paddle.v2.fluid.core as core import paddle.v2.fluid.core as core
import collections import collections
from paddle.v2.fluid.backward import append_backward_ops from paddle.v2.fluid.backward import append_backward
from paddle.v2.fluid.op import Operator from paddle.v2.fluid.op import Operator
from paddle.v2.fluid.executor import Executor from paddle.v2.fluid.executor import Executor
from paddle.v2.fluid.framework import Program, OpProtoHolder from paddle.v2.fluid.framework import Program, OpProtoHolder
@ -491,7 +491,7 @@ class OpTest(unittest.TestCase):
op_loss.desc.infer_var_type(block.desc) op_loss.desc.infer_var_type(block.desc)
op_loss.desc.infer_shape(block.desc) op_loss.desc.infer_shape(block.desc)
param_grad_list = append_backward_ops( param_grad_list = append_backward(
loss=loss, parameter_list=input_to_check, no_grad_set=no_grad_set) loss=loss, parameter_list=input_to_check, no_grad_set=no_grad_set)
feed_dict = { feed_dict = {

@ -2,7 +2,7 @@ import unittest
import paddle.v2.fluid.core as core import paddle.v2.fluid.core as core
import paddle.v2.fluid.layers as layers import paddle.v2.fluid.layers as layers
from paddle.v2.fluid.executor import Executor from paddle.v2.fluid.executor import Executor
from paddle.v2.fluid.backward import append_backward_ops from paddle.v2.fluid.backward import append_backward
from paddle.v2.fluid.framework import default_main_program from paddle.v2.fluid.framework import default_main_program
import numpy import numpy
@ -64,7 +64,7 @@ class TestArrayReadWrite(unittest.TestCase):
total_sum = layers.sums(input=[a_sum, x_sum]) total_sum = layers.sums(input=[a_sum, x_sum])
total_sum_scaled = layers.scale(x=total_sum, scale=1 / 6.0) total_sum_scaled = layers.scale(x=total_sum, scale=1 / 6.0)
append_backward_ops(total_sum_scaled) append_backward(total_sum_scaled)
g_vars = map(default_main_program().global_block().var, g_vars = map(default_main_program().global_block().var,
[each_x.name + "@GRAD" for each_x in x]) [each_x.name + "@GRAD" for each_x in x])

@ -3,7 +3,7 @@ import paddle.v2.fluid.layers as layers
import paddle.v2.fluid.core as core import paddle.v2.fluid.core as core
from paddle.v2.fluid.framework import default_startup_program, default_main_program from paddle.v2.fluid.framework import default_startup_program, default_main_program
from paddle.v2.fluid.executor import Executor from paddle.v2.fluid.executor import Executor
from paddle.v2.fluid.backward import append_backward_ops from paddle.v2.fluid.backward import append_backward
import numpy import numpy
@ -26,7 +26,7 @@ class ConditionalBlock(unittest.TestCase):
outs = exe.run(feed={'X': x}, fetch_list=[out])[0] outs = exe.run(feed={'X': x}, fetch_list=[out])[0]
print outs print outs
loss = layers.mean(x=out) loss = layers.mean(x=out)
append_backward_ops(loss=loss) append_backward(loss=loss)
outs = exe.run( outs = exe.run(
feed={'X': x}, feed={'X': x},
fetch_list=[ fetch_list=[

@ -4,7 +4,7 @@ import numpy
import paddle.v2.fluid.layers as layers import paddle.v2.fluid.layers as layers
from paddle.v2.fluid.framework import Program, program_guard from paddle.v2.fluid.framework import Program, program_guard
from paddle.v2.fluid.executor import Executor from paddle.v2.fluid.executor import Executor
from paddle.v2.fluid.backward import append_backward_ops from paddle.v2.fluid.backward import append_backward
class TestCPULoDTensorArrayOps(unittest.TestCase): class TestCPULoDTensorArrayOps(unittest.TestCase):
@ -170,7 +170,7 @@ class TestCPULoDTensorArrayOpGrad(unittest.TestCase):
mean = layers.mean(x=result) mean = layers.mean(x=result)
append_backward_ops(mean) append_backward(mean)
tensor = core.LoDTensor() tensor = core.LoDTensor()
tensor.set(numpy.arange(10).reshape(10, 1).astype('float32'), place) tensor.set(numpy.arange(10).reshape(10, 1).astype('float32'), place)

@ -2,7 +2,7 @@ import unittest
import paddle.v2.fluid.framework as framework import paddle.v2.fluid.framework as framework
import paddle.v2.fluid.optimizer as optimizer import paddle.v2.fluid.optimizer as optimizer
from paddle.v2.fluid.backward import append_backward_ops from paddle.v2.fluid.backward import append_backward
class TestOptimizer(unittest.TestCase): class TestOptimizer(unittest.TestCase):
@ -102,7 +102,7 @@ class TestMomentumOptimizer(unittest.TestCase):
dtype="float32", shape=[1], lod_level=0, name="mean.out") dtype="float32", shape=[1], lod_level=0, name="mean.out")
block.append_op( block.append_op(
type="mean", inputs={"X": mul_out}, outputs={"Out": mean_out}) type="mean", inputs={"X": mul_out}, outputs={"Out": mean_out})
params_grads = append_backward_ops(mean_out) params_grads = append_backward(mean_out)
self.assertEqual(len(params_grads), 1) self.assertEqual(len(params_grads), 1)
self.assertEqual(len(momentum_optimizer.get_accumulators()), 0) self.assertEqual(len(momentum_optimizer.get_accumulators()), 0)
opts = momentum_optimizer.create_optimization_pass( opts = momentum_optimizer.create_optimization_pass(
@ -151,7 +151,7 @@ class TestMomentumOptimizer(unittest.TestCase):
learning_rate = 0.01 learning_rate = 0.01
momentum_optimizer = self.MockMomentum( momentum_optimizer = self.MockMomentum(
learning_rate=learning_rate, momentum=0.2, use_nesterov=True) learning_rate=learning_rate, momentum=0.2, use_nesterov=True)
params_grads = append_backward_ops(mean_out) params_grads = append_backward(mean_out)
self.assertEqual(len(params_grads), 1) self.assertEqual(len(params_grads), 1)
self.assertEqual(len(momentum_optimizer.get_accumulators()), 0) self.assertEqual(len(momentum_optimizer.get_accumulators()), 0)
opts = momentum_optimizer.create_optimization_pass( opts = momentum_optimizer.create_optimization_pass(
@ -209,7 +209,7 @@ class TestAdagradOptimizer(unittest.TestCase):
learning_rate = 0.01 learning_rate = 0.01
adagrad_optimizer = self.MockAdagrad( adagrad_optimizer = self.MockAdagrad(
learning_rate=learning_rate, epsilon=1.0e-6) learning_rate=learning_rate, epsilon=1.0e-6)
params_grads = append_backward_ops(mean_out) params_grads = append_backward(mean_out)
self.assertEqual(len(params_grads), 1) self.assertEqual(len(params_grads), 1)
self.assertEqual(len(adagrad_optimizer.get_accumulators()), 0) self.assertEqual(len(adagrad_optimizer.get_accumulators()), 0)
opts = adagrad_optimizer.create_optimization_pass(params_grads, mul_out, opts = adagrad_optimizer.create_optimization_pass(params_grads, mul_out,
@ -269,7 +269,7 @@ class TestAdamOptimizer(unittest.TestCase):
learning_rate = 0.01 learning_rate = 0.01
adam_optimizer = self.MockAdam( adam_optimizer = self.MockAdam(
learning_rate=learning_rate, beta1=0.9, beta2=0.999) learning_rate=learning_rate, beta1=0.9, beta2=0.999)
params_grads = append_backward_ops(mean_out) params_grads = append_backward(mean_out)
self.assertEqual(len(params_grads), 1) self.assertEqual(len(params_grads), 1)
self.assertEqual(len(adam_optimizer.get_accumulators()), 0) self.assertEqual(len(adam_optimizer.get_accumulators()), 0)
opts = adam_optimizer.create_optimization_pass(params_grads, mul_out, opts = adam_optimizer.create_optimization_pass(params_grads, mul_out,
@ -331,7 +331,7 @@ class TestAdamaxOptimizer(unittest.TestCase):
learning_rate = 0.01 learning_rate = 0.01
adamax_optimizer = self.MockAdamax( adamax_optimizer = self.MockAdamax(
learning_rate=learning_rate, beta1=0.9, beta2=0.999) learning_rate=learning_rate, beta1=0.9, beta2=0.999)
params_grads = append_backward_ops(mean_out) params_grads = append_backward(mean_out)
self.assertEqual(len(params_grads), 1) self.assertEqual(len(params_grads), 1)
self.assertEqual(len(adamax_optimizer.get_accumulators()), 0) self.assertEqual(len(adamax_optimizer.get_accumulators()), 0)
opts = adamax_optimizer.create_optimization_pass(params_grads, mul_out, opts = adamax_optimizer.create_optimization_pass(params_grads, mul_out,
@ -390,7 +390,7 @@ class TestDecayedAdagradOptimizer(unittest.TestCase):
learning_rate = 0.01 learning_rate = 0.01
decayed_adagrad_optimizer = self.MockDecayedAdagrad( decayed_adagrad_optimizer = self.MockDecayedAdagrad(
learning_rate=learning_rate, decay=0.95, epsilon=1.0e-6) learning_rate=learning_rate, decay=0.95, epsilon=1.0e-6)
params_grads = append_backward_ops(mean_out) params_grads = append_backward(mean_out)
self.assertEqual(len(params_grads), 1) self.assertEqual(len(params_grads), 1)
self.assertEqual(len(decayed_adagrad_optimizer.get_accumulators()), 0) self.assertEqual(len(decayed_adagrad_optimizer.get_accumulators()), 0)
opts = decayed_adagrad_optimizer.create_optimization_pass( opts = decayed_adagrad_optimizer.create_optimization_pass(

@ -3,7 +3,7 @@ import unittest
import paddle.v2.fluid.layers as layers import paddle.v2.fluid.layers as layers
from paddle.v2.fluid.framework import Program, grad_var_name from paddle.v2.fluid.framework import Program, grad_var_name
from paddle.v2.fluid.executor import Executor from paddle.v2.fluid.executor import Executor
from paddle.v2.fluid.backward import append_backward_ops from paddle.v2.fluid.backward import append_backward
import numpy as np import numpy as np
import paddle.v2.fluid.core as core import paddle.v2.fluid.core as core
@ -177,7 +177,7 @@ class RecurrentOpTest1(unittest.TestCase):
def test_backward(self): def test_backward(self):
self.check_forward() self.check_forward()
append_backward_ops(self.output) append_backward(self.output)
ana_grad = [np.array(x) for x in self.backward()] ana_grad = [np.array(x) for x in self.backward()]

Some files were not shown because too many files have changed in this diff Show More

Loading…
Cancel
Save