Merge branch 'develop' of https://github.com/PaddlePaddle/Paddle into add_dilation

mobile_baidu
xzl 8 years ago
commit 47329f6bfc

1
.gitignore vendored

@ -28,3 +28,4 @@ cmake_install.cmake
paddle/.timestamp
python/paddlepaddle.egg-info/
paddle/pybind/pybind.h
python/paddle/v2/framework/tests/tmp/*

@ -15,7 +15,7 @@ nv_test(lod_tensor_gpu_test SRCS lod_tensor_test.cu DEPS lod_tensor)
cc_test(variable_test SRCS variable_test.cc)
cc_library(scope SRCS scope.cc)
cc_library(scope SRCS scope.cc DEPS glog)
cc_test(scope_test SRCS scope_test.cc DEPS scope)

@ -107,6 +107,8 @@ class OpDescBind {
void InferVarType(BlockDescBind *block) const;
void MarkAsTarget() { desc_.set_is_target(true); }
void Flush();
private:

@ -49,6 +49,13 @@ ProgramDescBind::ProgramDescBind(const ProgramDescBind &o) {
}
}
ProgramDescBind::ProgramDescBind(const ProgramDesc &desc) {
desc_ = desc;
for (auto &block_desc : *desc_.mutable_blocks()) {
blocks_.emplace_back(new BlockDescBind(this, &block_desc));
}
}
ProgramDescBind::ProgramDescBind(const std::string &binary_str) {
PADDLE_ENFORCE(desc_.ParseFromString(binary_str),
"Fail to parse program_desc from binary string.");

@ -29,6 +29,8 @@ class ProgramDescBind {
public:
ProgramDescBind();
explicit ProgramDescBind(const ProgramDesc &desc);
ProgramDescBind(const ProgramDescBind &o);
explicit ProgramDescBind(const std::string &binary_str);

@ -46,7 +46,7 @@ bool IsTarget(const OpDesc& op_desc) {
return false;
}
void prune_impl(const ProgramDesc& input, ProgramDesc& output, int block_id) {
void prune_impl(const ProgramDesc& input, ProgramDesc* output, int block_id) {
// TODO(tonyyang-svail):
// - will change to use multiple blocks for RNN op and Cond Op
@ -91,8 +91,8 @@ void prune_impl(const ProgramDesc& input, ProgramDesc& output, int block_id) {
// we reverse the should_run vector
std::reverse(should_run.begin(), should_run.end());
output = input;
auto* op_field = output.mutable_blocks(block_id)->mutable_ops();
*output = input;
auto* op_field = output->mutable_blocks(block_id)->mutable_ops();
op_field->Clear();
for (size_t i = 0; i < should_run.size(); ++i) {
if (should_run[i]) {
@ -101,7 +101,8 @@ void prune_impl(const ProgramDesc& input, ProgramDesc& output, int block_id) {
}
}
void Prune(const ProgramDesc& input, ProgramDesc& output) {
// TODO(fengjiayi): Prune() could be inplaced to avoid unnecessary copies
void Prune(const ProgramDesc& input, ProgramDesc* output) {
prune_impl(input, output, 0);
}

@ -20,7 +20,7 @@ limitations under the License. */
namespace paddle {
namespace framework {
void Prune(const ProgramDesc& input, ProgramDesc& output);
void Prune(const ProgramDesc& input, ProgramDesc* output);
} // namespace framework
} // namespace paddle

@ -59,11 +59,11 @@ TEST(Prune, one_operator) {
f::ProgramDesc *pdesc = program.Proto();
f::ProgramDesc pruned;
Prune(*pdesc, pruned);
Prune(*pdesc, &pruned);
PADDLE_ENFORCE_EQ(pruned.blocks(0).ops_size(), 0);
pdesc->mutable_blocks(0)->mutable_ops(0)->set_is_target(true);
Prune(*pdesc, pruned);
Prune(*pdesc, &pruned);
PADDLE_ENFORCE_EQ(pruned.blocks(0).ops_size(), 1);
}
@ -81,7 +81,7 @@ TEST(Prune, forward) {
for (int i = 0; i < pdesc->blocks(0).ops_size(); ++i) {
f::ProgramDesc pruned;
pdesc->mutable_blocks(0)->mutable_ops(i)->set_is_target(true);
Prune(*pdesc, pruned);
Prune(*pdesc, &pruned);
PADDLE_ENFORCE_EQ(pruned.blocks(0).ops_size(), i + 1);
}
}
@ -100,7 +100,7 @@ TEST(Prune, multi_input_op) {
pdesc->mutable_blocks(0)->mutable_ops(3)->set_is_target(true);
f::ProgramDesc pruned;
Prune(*pdesc, pruned);
Prune(*pdesc, &pruned);
PADDLE_ENFORCE_EQ(pruned.blocks(0).ops_size(), 4);
}
@ -116,7 +116,7 @@ TEST(Prune, multi_output_op) {
pdesc->mutable_blocks(0)->mutable_ops(2)->set_is_target(true);
f::ProgramDesc pruned;
Prune(*pdesc, pruned);
Prune(*pdesc, &pruned);
PADDLE_ENFORCE_EQ(pruned.blocks(0).ops_size(), 2);
}
@ -133,6 +133,6 @@ TEST(Prune, multi_target) {
pdesc->mutable_blocks(0)->mutable_ops(2)->set_is_target(true);
f::ProgramDesc pruned;
Prune(*pdesc, pruned);
Prune(*pdesc, &pruned);
PADDLE_ENFORCE_EQ(pruned.blocks(0).ops_size(), 3);
}

@ -16,6 +16,7 @@ limitations under the License. */
#include <memory> // for unique_ptr
#include <mutex> // for call_once
#include "glog/logging.h"
#include "paddle/string/printf.h"
namespace paddle {
@ -23,7 +24,10 @@ namespace framework {
Scope::~Scope() {
DropKids();
for (auto& kv : vars_) delete kv.second;
for (auto& kv : vars_) {
VLOG(3) << "Destroy variable " << kv.first;
delete kv.second;
}
}
Scope& Scope::NewScope() const {
@ -38,6 +42,7 @@ Variable* Scope::Var(const std::string& name) {
}
Variable* v = new Variable();
vars_[name] = v;
VLOG(3) << "Create variable " << name << " on scope";
v->name_ = &(vars_.find(name)->first);
return v;
}

@ -1,6 +1,6 @@
add_subdirectory(detail)
cc_library(memory SRCS memory.cc)
cc_library(memory SRCS memory.cc DEPS place)
cc_library(memcpy SRCS memcpy.cc)
cc_library(paddle_memory

@ -13,6 +13,7 @@
limitations under the License. */
#include "paddle/memory/detail/meta_cache.h"
#include "glog/logging.h"
#include "paddle/memory/detail/memory_block.h"
#include "paddle/platform/assert.h"
@ -28,7 +29,9 @@ Metadata MetadataCache::load(const MemoryBlock* block) {
PADDLE_ASSERT(existing_metadata->second.check_guards());
return existing_metadata->second;
} else {
PADDLE_ASSERT(reinterpret_cast<const Metadata*>(block)->check_guards());
auto* meta = reinterpret_cast<const Metadata*>(block);
VLOG(3) << "Load MetaData type=" << meta->type;
PADDLE_ASSERT(meta->check_guards());
return *reinterpret_cast<const Metadata*>(block);
}
}

@ -39,11 +39,15 @@ BuddyAllocator* GetCPUBuddyAllocator() {
template <>
void* Alloc<platform::CPUPlace>(platform::CPUPlace place, size_t size) {
return GetCPUBuddyAllocator()->Alloc(size);
VLOG(3) << "Allocate " << size << " bytes on " << platform::Place(place);
void* p = GetCPUBuddyAllocator()->Alloc(size);
VLOG(3) << " pointer=" << p;
return p;
}
template <>
void Free<platform::CPUPlace>(platform::CPUPlace place, void* p) {
VLOG(3) << "Free pointer=" << p << " on " << platform::Place(place);
GetCPUBuddyAllocator()->Free(p);
}

@ -117,9 +117,6 @@ class BatchNormKernel<platform::GPUPlace, T> : public framework::OpKernel<T> {
math::SetConstant<platform::GPUPlace, T> functor;
functor(ctx.device_context(), saved_mean, 0);
functor(ctx.device_context(), saved_variance, 0);
// FIXME(qiao) should not set zero self
functor(ctx.device_context(), mean_out, 0);
functor(ctx.device_context(), variance_out, 0);
auto handle = ctx.cuda_device_context().cudnn_handle();
@ -211,8 +208,15 @@ class BatchNormGradKernel<platform::GPUPlace, T>
mode_ = CUDNN_BATCHNORM_SPATIAL;
#endif
std::vector<int> dims = {N, C, H, W, D};
std::vector<int> strides = {H * W * C * D, 1, W * D * C, D * C, C};
std::vector<int> dims;
std::vector<int> strides;
if (tensor_format == TensorFormat::NCHW) {
dims = {N, C, H, W, D};
strides = {C * H * W * D, H * W * D, W * D, D, 1};
} else {
dims = {N, C, H, W, D};
strides = {H * W * C * D, 1, W * D * C, D * C, C};
}
CUDNN_ENFORCE(platform::dynload::cudnnSetTensorNdDescriptor(
data_desc_, CudnnDataType<T>::type,
x_dims.size() > 3 ? x_dims.size() : 4, dims.data(), strides.data()));

@ -144,11 +144,11 @@ class SequencePoolGradKernel : public framework::OpKernel<T> {
Eigen::Map<const Eigen::Matrix<T, Eigen::Dynamic, Eigen::Dynamic>>
in_t_map(in_t.data<T>(), h, w);
int row_id;
Eigen::array<int, 2> extents = {1, 1};
Eigen::array<int, 2> extents{{1, 1}};
for (int col_id = 0; col_id < w; col_id++) {
in_t_map.col(col_id).maxCoeff(&row_id);
Eigen::array<int, 2> in_offsets = {row_id, col_id};
Eigen::array<int, 2> out_offsets = {0, col_id};
Eigen::array<int, 2> in_offsets{{row_id, col_id}};
Eigen::array<int, 2> out_offsets{{0, col_id}};
in_g_e.slice(in_offsets, extents).device(place) =
out_g_e.slice(out_offsets, extents);
}

@ -0,0 +1,70 @@
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "paddle/operators/sign_op.h"
namespace paddle {
namespace operators {
class SignOp : public framework::OperatorWithKernel {
public:
using framework::OperatorWithKernel::OperatorWithKernel;
void InferShape(framework::InferShapeContext *ctx) const override {
PADDLE_ENFORCE(ctx->HasInput("X"),
"Input(X) of SignOp should not be null.");
PADDLE_ENFORCE(ctx->HasOutput("Out"),
"Output(Out) of SignOp should not be null.");
ctx->SetOutputDim("Out", ctx->GetInputDim("X"));
ctx->ShareLoD("X", /*->*/ "Out");
}
};
template <typename AttrType>
class SignOpMaker : public framework::OpProtoAndCheckerMaker {
public:
SignOpMaker(framework::OpProto *proto, framework::OpAttrChecker *op_checker)
: OpProtoAndCheckerMaker(proto, op_checker) {
AddInput("X", "(Tensor) Input tensor of sign operator.");
AddOutput("Out", "(Tensor) Output tensor of sign operator.");
AddComment(R"DOC(Sign operator
The equation is: Out = X.sign()
)DOC");
}
};
class SignGradMaker : public framework::SingleGradOpDescMaker {
public:
using framework::SingleGradOpDescMaker::SingleGradOpDescMaker;
std::unique_ptr<framework::OpDescBind> Apply() const override {
auto *grad_op = new framework::OpDescBind();
grad_op->SetType("scale");
grad_op->SetInput("X", OutputGrad("Out"));
grad_op->SetOutput("Out", InputGrad("X"));
grad_op->SetAttr("scale", 0.0f);
return std::unique_ptr<framework::OpDescBind>(grad_op);
}
};
} // namespace operators
} // namespace paddle
namespace ops = paddle::operators;
REGISTER_OPERATOR(sign, ops::SignOp, ops::SignOpMaker<float>,
ops::SignGradMaker);
REGISTER_OP_CPU_KERNEL(sign,
ops::SignKernel<paddle::platform::CPUPlace, float>);

@ -0,0 +1,18 @@
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "paddle/operators/sign_op.h"
REGISTER_OP_GPU_KERNEL(
sign, paddle::operators::SignKernel<paddle::platform::GPUPlace, float>);

@ -0,0 +1,38 @@
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#pragma once
#include "paddle/framework/eigen.h"
#include "paddle/framework/op_registry.h"
namespace paddle {
namespace operators {
template <typename Place, typename T>
class SignKernel : public framework::OpKernel<T> {
public:
virtual void Compute(const framework::ExecutionContext& context) const {
auto* out = context.Output<framework::Tensor>("Out");
auto* in = context.Input<framework::Tensor>("X");
out->mutable_data<T>(in->place());
auto eigen_out = framework::EigenVector<T>::Flatten(*out);
auto eigen_in = framework::EigenVector<T>::Flatten(*in);
auto& place = context.GetEigenDevice<Place>();
eigen_out.device(place) = eigen_in.sign();
}
};
} // namespace operators
} // namespace paddle

@ -1,7 +1,7 @@
if(WITH_PYTHON)
cc_library(paddle_pybind SHARED
SRCS pybind.cc exception.cc protobuf.cc
DEPS pybind python backward proto_desc tensor_array paddle_memory executor
DEPS pybind python backward proto_desc tensor_array paddle_memory executor prune
${GLOB_OP_LIB})
endif(WITH_PYTHON)

@ -141,6 +141,13 @@ void BindProgramDesc(py::module &m) {
desc->SerializeToString(&res),
"Serialize ProgramDesc Error. This could be a bug of Paddle.");
return res;
})
.def("parse_from_string",
[](ProgramDescBind &program_desc, const std::string &data) {
ProgramDesc *desc = program_desc.Proto();
PADDLE_ENFORCE(desc->ParseFromString(data),
"Fail to parse ProgramDesc from string. This could "
"be a bug of Paddle.");
});
}

@ -19,6 +19,7 @@ limitations under the License. */
#include "paddle/framework/feed_fetch_method.h"
#include "paddle/framework/framework.pb.h"
#include "paddle/framework/lod_tensor.h"
#include "paddle/framework/prune.h"
#include "paddle/framework/selected_rows.h"
#include "paddle/framework/tensor_array.h"
#include "paddle/operators/cond_op.h"
@ -237,6 +238,16 @@ All parameter, weight, gradient are variables in Paddle.
}
return ret_values;
});
m.def("prune", [](const ProgramDescBind &origin,
const std::vector<std::array<size_t, 2>> &targets) {
ProgramDescBind prog_with_targets(origin);
for (const auto &t : targets) {
prog_with_targets.Block(t[0])->Op(t[1])->MarkAsTarget();
}
ProgramDesc pruned_desc;
Prune(*prog_with_targets.Proto(), &pruned_desc);
return new ProgramDescBind(pruned_desc);
});
m.def_submodule(
"var_names",
"The module will return special predefined variable name in Paddle")

@ -251,6 +251,8 @@ class Operator(object):
self.desc.set_output(out_proto.name, out_argu_names)
if attrs is not None:
if not isinstance(attrs, dict):
raise TypeError("'attrs' should be a dict.")
for attr in proto.attrs:
attr_name = attr.name
if (not attr_name in attrs) or (attrs[attr_name] is None):
@ -291,6 +293,14 @@ class Operator(object):
def output_names(self):
return self.desc.output_names()
@property
def idx(self):
for i, op in enumerate(self.block.ops):
if op == self:
return i
raise ValueError(
"Can't find op itself in it's block. It could be a bug of Paddle.")
def has_attr(self, name):
return self.desc.has_attr(name)
@ -440,10 +450,31 @@ class Program(object):
p.sync_with_cpp()
return p
def prune(self, targets):
if not isinstance(targets, list):
targets = [targets]
targets_idx = []
for t in targets:
if not isinstance(t, Operator):
if isinstance(t, Variable):
t = t.op
else:
raise ValueError(
"All targets of prune() can only be Variable or Operator."
)
targets_idx.append([t.block.idx, t.idx])
res = Program()
res.desc = core.prune(self.desc, targets_idx)
res.blocks = [Block(res, i) for i in xrange(res.desc.num_blocks())]
res.sync_with_cpp()
return res
@staticmethod
def parse_from_string(binary_str):
p = Program()
p.desc = core.ProgramDesc(binary_str)
p.blocks = [Block(p, i) for i in xrange(p.desc.num_blocks())]
p.sync_with_cpp()
return p

@ -1,11 +1,12 @@
import os
import cPickle as pickle
from paddle.v2.framework.framework import Program, Parameter, g_program, \
Variable
__all__ = [
'save_vars', 'save_params', 'save_persistables', 'load_vars', 'load_params',
'load_persistables'
'load_persistables', "save_inference_model", "load_inference_model"
]
@ -124,6 +125,7 @@ def load_vars(executor, dirname, program=None, vars=None, predicate=None):
inputs={},
outputs={"Out": [new_var]},
attrs={'file_path': os.path.join(dirname, new_var.name)})
executor.run(load_prog)
@ -141,3 +143,88 @@ def load_persistables(executor, dirname, program=None):
"""
load_vars(
executor, dirname=dirname, program=program, predicate=is_persistable)
def save_inference_model(dirname,
feeded_var_names,
target_vars,
executor,
program=None):
"""
Build a model especially for inference,
and save it to directory by the executor.
:param dirname: directory path
:param feeded_var_names: Names of variables that need to be feeded data during inference
:param target_vars: Variables from which we can get inference results.
:param executor: executor that save inference model
:param program: original program, which will be pruned to build the inference model.
Default g_program.
:return: None
"""
if program is None:
program = g_program
if not isinstance(target_vars, list):
target_vars = [target_vars]
if not os.path.isdir(dirname):
os.makedirs(dirname)
pruned_program = program.prune(target_vars)
fetch_var_names = [v.name for v in target_vars]
model_file_name = dirname + "/__model__"
with open(model_file_name, "w") as f:
pickle.dump({
"program_desc_str": pruned_program.desc.serialize_to_string(),
"feed_var_names": feeded_var_names,
"fetch_var_names": fetch_var_names
}, f, -1)
save_params(executor, dirname, program)
def load_persistables_if_exist(executor, dirname, program=None):
filenames = next(os.walk(dirname))[2]
filenames = set(filenames)
def _is_presistable_and_exist_(var):
if not is_persistable(var):
return False
else:
return var.name in filenames
load_vars(
executor,
dirname,
program=program,
vars=None,
predicate=_is_presistable_and_exist_)
def load_inference_model(dirname, executor):
"""
Load inference model from a directory
:param dirname: directory path
:param executor: executor that load inference model
:return: [program, feed_var_names, fetch_var_names]
program: program especially for inference.
feeded_var_names: Names of variables that need to feed data
fetch_vars: Variables from which we can get inference results.
"""
if not os.path.isdir(dirname):
raise ValueError("There is no directory named '%s'", dirname)
model_file_name = dirname + "/__model__"
model = pickle.load(open(model_file_name, "r"))
program_desc_str = model["program_desc_str"]
feed_var_names = model["feed_var_names"]
fetch_var_names = model["fetch_var_names"]
program = Program.parse_from_string(program_desc_str)
load_persistables_if_exist(executor, dirname, program)
fetch_vars = [program.global_block().var(name) for name in fetch_var_names]
return [program, feed_var_names, fetch_vars]

@ -131,12 +131,14 @@ class LayerHelper(object):
return dtype
def create_parameter(self, attr, shape, dtype, suffix='w'):
if attr['name'] is None:
attr['name'] = unique_name(".".join([self.name, suffix]))
# Deepcopy the attr so that parameters can be shared in program
attr_copy = copy.deepcopy(attr)
if attr_copy['name'] is None:
attr_copy['name'] = unique_name(".".join([self.name, suffix]))
self.init_program.global_block().create_parameter(
dtype=dtype, shape=shape, **attr)
dtype=dtype, shape=shape, **attr_copy)
return self.program.global_block().create_parameter(
name=attr['name'], dtype=dtype, shape=shape)
name=attr_copy['name'], dtype=dtype, shape=shape)
def create_tmp_variable(self, dtype):
return self.program.current_block().create_var(

@ -18,7 +18,8 @@ class Optimizer(object):
but need to use one of it's implementation.
"""
def __init__(self):
def __init__(self, global_step=None):
self._global_step = global_step
# Dictionary of accumulators. Some optimizer subclasses need to
# allocate and manage extra variables associated with the parameters
# to train. These variables are called accumulators.
@ -109,6 +110,26 @@ class Optimizer(object):
format(name, param.name))
return self._accumulators[name][param.name]
def _increment_global_step(self, block):
"""Increment the global step by 1 after every iteration
Args:
block: the block in which the loss variable is present
Returns:
list with global_step increment op as its only element
"""
assert isinstance(block, framework.Block)
assert self._global_step is not None
# create the increment op
increment_op = block.append_op(
type="increment",
inputs={"X": self._global_step},
outputs={"Out": self._global_step},
attrs={"step": 1.0})
return increment_op
def create_optimization_pass(self, parameters_and_grads, loss):
"""Add optimization operators to update gradients to variables.
@ -152,6 +173,8 @@ class Optimizer(object):
if finish_ops is not None:
return_ops += finish_ops
if self._global_step is not None:
return_ops.append(self._increment_global_step(loss.block))
return return_ops
def minimize(self, loss, parameter_list=None, no_grad_set=None):
@ -172,9 +195,9 @@ class SGDOptimizer(Optimizer):
""" Simple SGD optimizer without any state.
"""
def __init__(self, learning_rate):
def __init__(self, learning_rate, global_step=None):
assert learning_rate is not None
super(SGDOptimizer, self).__init__()
super(SGDOptimizer, self).__init__(global_step)
self.type = "sgd"
self._learning_rate = learning_rate
@ -215,10 +238,14 @@ class MomentumOptimizer(Optimizer):
"""
_velocity_acc_str = "velocity"
def __init__(self, learning_rate, momentum, use_nesterov=False):
def __init__(self,
learning_rate,
momentum,
use_nesterov=False,
global_step=None):
assert learning_rate is not None
assert momentum is not None
super(MomentumOptimizer, self).__init__()
super(MomentumOptimizer, self).__init__(global_step)
self.type = "momentum"
self._learning_rate = learning_rate
self._momentum = momentum
@ -275,10 +302,10 @@ class AdagradOptimizer(Optimizer):
"""
_moment_acc_str = "moment"
def __init__(self, learning_rate, epsilon=1.0e-6):
def __init__(self, learning_rate, epsilon=1.0e-6, global_step=None):
assert learning_rate is not None
assert epsilon is not None
super(AdagradOptimizer, self).__init__()
super(AdagradOptimizer, self).__init__(global_step)
self.type = "adagrad"
self._learning_rate = learning_rate
self._epsilon = epsilon
@ -337,12 +364,13 @@ class AdamOptimizer(Optimizer):
learning_rate=0.001,
beta1=0.9,
beta2=0.999,
epsilon=1e-8):
epsilon=1e-8,
global_step=None):
assert learning_rate is not None
assert beta1 is not None
assert beta2 is not None
assert epsilon is not None
super(AdamOptimizer, self).__init__()
super(AdamOptimizer, self).__init__(global_step)
self.type = "adam"
self._learning_rate = learning_rate
self._beta1 = beta1
@ -458,7 +486,8 @@ class AdamaxOptimizer(Optimizer):
learning_rate=0.001,
beta1=0.9,
beta2=0.999,
epsilon=1e-8):
epsilon=1e-8,
global_step=None):
assert learning_rate is not None
assert beta1 is not None
assert beta2 is not None

@ -1,6 +1,8 @@
import paddle.v2.framework.framework as framework
__all__ = ['append_regularization_ops', 'L2DecayRegularizer']
__all__ = [
'append_regularization_ops', 'L2DecayRegularizer', 'L1DecayRegularizer'
]
def append_regularization_ops(parameters_and_grads):
@ -97,3 +99,43 @@ class L2DecayRegularizer(WeightDecayRegularizer):
attrs={"scale": self._regularization_coeff})
return decay
class L1DecayRegularizer(WeightDecayRegularizer):
"""Implements the L1 Weight Decay Regularization
"""
def __init__(self, regularization_coeff=0.0):
assert regularization_coeff is not None
super(L1DecayRegularizer, self).__init__()
self._regularization_coeff = regularization_coeff
def __call__(self, param, block):
"""Add L1 weight decay ops to network
Adds L1 weight decay ops.
L1WeightDecay = reg_coeff * sign(parameter)
Args:
param: parameter variable for which regularization is applied
block: block in which variable is to be created
Returns:
new variable for weight decay
"""
assert isinstance(param, framework.Parameter)
assert isinstance(block, framework.Block)
decay = block.create_var(
dtype="float32", shape=param.shape, lod_level=param.lod_level)
# Append sign op
block.append_op(
type='sign', inputs={"X": param}, outputs={"Out": decay})
# Append scale op to the output of sign op
block.append_op(
type='scale',
inputs={"X": decay},
outputs={"Out": decay},
attrs={"scale": self._regularization_coeff})
return decay

Some files were not shown because too many files have changed in this diff Show More

Loading…
Cancel
Save