Merge branch 'develop' into fix_rendering_error_of_transpose_op

fix-profile-doc-typo
ying 7 years ago
commit c6b78e56b1

@ -23,6 +23,7 @@ limitations under the License. */
#include "paddle/framework/op_registry.h"
#include "paddle/platform/place.h"
DECLARE_bool(do_memory_benchmark);
DEFINE_bool(check_nan_inf, false,
"Checking whether operator produce NAN/INF or not. It will be "
"extremely slow so please use this flag wisely.");
@ -117,6 +118,10 @@ void Executor::Run(const ProgramDesc& pdesc, Scope* scope, int block_id,
auto op = paddle::framework::OpRegistry::CreateOp(*op_desc);
VLOG(3) << op->DebugStringEx(local_scope);
op->Run(*local_scope, place_);
if (FLAGS_do_memory_benchmark) {
VLOG(2) << "Memory used after operator " + op->Type() + " running: "
<< memory::memory_usage(place_);
}
if (FLAGS_check_nan_inf) {
for (auto& vname : op->OutputVars(true)) {
auto* var = local_scope->FindVar(vname);
@ -130,6 +135,12 @@ void Executor::Run(const ProgramDesc& pdesc, Scope* scope, int block_id,
if (create_vars && create_local_scope) {
scope->DeleteScope(local_scope);
}
if (FLAGS_do_memory_benchmark) {
VLOG(2) << "-------------------------------------------------------";
VLOG(2) << "Memory used after deleting local scope: "
<< memory::memory_usage(place_);
VLOG(2) << "-------------------------------------------------------";
}
}
} // namespace framework

@ -20,6 +20,10 @@ limitations under the License. */
#include "paddle/framework/threadpool.h"
#include "paddle/string/printf.h"
DEFINE_bool(do_memory_benchmark, false,
"Doing memory benchmark. It will make deleting scope synchronized, "
"and add some memory usage logs");
namespace paddle {
namespace framework {
@ -88,8 +92,12 @@ void Scope::DeleteScope(Scope* scope) {
auto it = std::find(this->kids_.begin(), this->kids_.end(), scope);
PADDLE_ENFORCE(it != this->kids_.end(), "Cannot find %p as kid scope", scope);
this->kids_.erase(it);
// Make delete async.
// When making memory benchmark on Fluid, we have to delete scope sync.
if (FLAGS_do_memory_benchmark) {
delete scope;
} else {
Async([scope] { delete scope; });
}
}
void Scope::Rename(const std::string& origin_name,

@ -65,14 +65,19 @@ bool PriorBoxLayer::init(const LayerMap& layerMap,
std::copy(pbConf.aspect_ratio().begin(),
pbConf.aspect_ratio().end(),
std::back_inserter(tmp));
// flip
int inputRatioLength = tmp.size();
for (int index = 0; index < inputRatioLength; index++) {
aspectRatio_.push_back(tmp[index]);
aspectRatio_.push_back(1 / tmp[index]);
if (maxSize_.size() > 0) CHECK_EQ(minSize_.size(), maxSize_.size());
// flip aspect ratios
for (int index = 0; index < tmp.size(); index++) {
real ar = tmp[index];
if (fabs(ar - 1.) < 1e-6) continue;
aspectRatio_.push_back(ar);
aspectRatio_.push_back(1. / ar);
}
numPriors_ = aspectRatio_.size();
if (maxSize_.size() > 0) numPriors_++;
numPriors_ = aspectRatio_.size() * minSize_.size() + maxSize_.size();
return true;
}
@ -99,24 +104,26 @@ void PriorBoxLayer::forward(PassType passType) {
for (int w = 0; w < layerWidth; ++w) {
real centerX = (w + 0.5) * stepW;
real centerY = (h + 0.5) * stepH;
real minSize = 0;
for (size_t s = 0; s < minSize_.size(); s++) {
// first prior.
minSize = minSize_[s];
real minSize = minSize_[s];
real boxWidth = minSize;
real boxHeight = minSize;
// xmin, ymin, xmax, ymax.
// priors with different aspect ratios
for (size_t r = 0; r < aspectRatio_.size(); r++) {
real ar = aspectRatio_[r];
boxWidth = minSize * sqrt(ar);
boxHeight = minSize / sqrt(ar);
tmpPtr[idx++] = (centerX - boxWidth / 2.) / imageWidth;
tmpPtr[idx++] = (centerY - boxHeight / 2.) / imageHeight;
tmpPtr[idx++] = (centerX + boxWidth / 2.) / imageWidth;
tmpPtr[idx++] = (centerY + boxHeight / 2.) / imageHeight;
// set the variance.
for (int t = 0; t < 4; t++) tmpPtr[idx++] = variance_[t];
}
if (maxSize_.size() > 0) {
CHECK_EQ(minSize_.size(), maxSize_.size());
// second prior.
for (size_t s = 0; s < maxSize_.size(); s++) {
// square prior with size sqrt(minSize * maxSize)
real maxSize = maxSize_[s];
boxWidth = boxHeight = sqrt(minSize * maxSize);
tmpPtr[idx++] = (centerX - boxWidth / 2.) / imageWidth;
@ -128,21 +135,8 @@ void PriorBoxLayer::forward(PassType passType) {
}
}
}
// rest of priors.
for (size_t r = 0; r < aspectRatio_.size(); r++) {
real ar = aspectRatio_[r];
if (fabs(ar - 1.) < 1e-6) continue;
real boxWidth = minSize * sqrt(ar);
real boxHeight = minSize / sqrt(ar);
tmpPtr[idx++] = (centerX - boxWidth / 2.) / imageWidth;
tmpPtr[idx++] = (centerY - boxHeight / 2.) / imageHeight;
tmpPtr[idx++] = (centerX + boxWidth / 2.) / imageWidth;
tmpPtr[idx++] = (centerY + boxHeight / 2.) / imageHeight;
// set the variance.
for (int t = 0; t < 4; t++) tmpPtr[idx++] = variance_[t];
}
}
}
// clip the prior's coordidate such that it is within [0, 1]
for (int d = 0; d < dim * 2; ++d)
if ((d % 8) < 4)

@ -39,6 +39,11 @@ N-dim tensor. X and Y could be any type. The each element of the Out tensor is
calculated by %s
)DOC",
comment.type, comment.equation));
AddAttr<int>("axis",
"(int, default -1). The start dimension index "
"for broadcasting Y onto X.")
.SetDefault(-1)
.EqualGreaterThan(-1);
}
};
@ -95,11 +100,5 @@ REGISTER_LOGICAL_OP(less_than, "Out = X < Y");
REGISTER_LOGICAL_KERNEL(less_than, CPU, paddle::operators::LessThanFunctor);
REGISTER_LOGICAL_OP(less_equal, "Out = X <= Y");
REGISTER_LOGICAL_KERNEL(less_equal, CPU, paddle::operators::LessEqualFunctor);
REGISTER_LOGICAL_OP(greater_than, "Out = X > Y");
REGISTER_LOGICAL_KERNEL(greater_than, CPU,
paddle::operators::GreaterThanFunctor);
REGISTER_LOGICAL_OP(greater_equal, "Out = X >= Y");
REGISTER_LOGICAL_KERNEL(greater_equal, CPU,
paddle::operators::GreaterEqualFunctor);
REGISTER_LOGICAL_OP(equal, "Out = X == Y");
REGISTER_LOGICAL_KERNEL(equal, CPU, paddle::operators::EqualFunctor);

@ -16,8 +16,4 @@ limitations under the License. */
REGISTER_LOGICAL_KERNEL(less_than, CUDA, paddle::operators::LessThanFunctor);
REGISTER_LOGICAL_KERNEL(less_equal, CUDA, paddle::operators::LessEqualFunctor);
REGISTER_LOGICAL_KERNEL(greater_than, CUDA,
paddle::operators::GreaterThanFunctor);
REGISTER_LOGICAL_KERNEL(greater_equal, CUDA,
paddle::operators::GreaterEqualFunctor);
REGISTER_LOGICAL_KERNEL(equal, CUDA, paddle::operators::EqualFunctor);

@ -16,6 +16,7 @@ limitations under the License. */
#include <math.h>
#include <type_traits>
#include "paddle/framework/op_registry.h"
#include "paddle/operators/elementwise_op_function.h"
#include "paddle/platform/transform.h"
namespace paddle {
@ -33,18 +34,6 @@ struct LessEqualFunctor {
HOSTDEVICE bool operator()(const T& a, const T& b) const { return a <= b; }
};
template <typename T>
struct GreaterThanFunctor {
using ELEM_TYPE = T;
HOSTDEVICE bool operator()(const T& a, const T& b) const { return a > b; }
};
template <typename T>
struct GreaterEqualFunctor {
using ELEM_TYPE = T;
HOSTDEVICE bool operator()(const T& a, const T& b) const { return a >= b; }
};
template <typename T>
struct EqualFunctor {
using ELEM_TYPE = T;
@ -65,14 +54,7 @@ class CompareOpKernel
public:
void Compute(const framework::ExecutionContext& context) const override {
using T = typename Functor::ELEM_TYPE;
auto* x = context.Input<framework::Tensor>("X");
auto* y = context.Input<framework::Tensor>("Y");
auto* out = context.Output<framework::Tensor>("Out");
Functor binary_func;
platform::Transform<DeviceContext> trans;
trans(context.template device_context<DeviceContext>(), x->data<T>(),
x->data<T>() + x->numel(), y->data<T>(),
out->mutable_data<bool>(context.GetPlace()), binary_func);
ElementwiseComputeEx<Functor, DeviceContext, T, bool>(context);
}
};

@ -176,14 +176,15 @@ class MidWiseTransformIterator<T, platform::CUDADeviceContext>
};
#endif
template <typename Functor, typename T, typename DeviceContext>
template <typename Functor, typename T, typename DeviceContext,
typename OutType = T>
class TransformFunctor {
public:
TransformFunctor(const framework::Tensor* x, const framework::Tensor* y,
framework::Tensor* z, const DeviceContext& ctx, Functor func)
: x_(x->data<T>()),
y_(y->data<T>()),
z_(z->mutable_data<T>(ctx.GetPlace())),
z_(z->mutable_data<OutType>(ctx.GetPlace())),
nx_(x->numel()),
ctx_(ctx),
func_(func) {}
@ -208,7 +209,7 @@ class TransformFunctor {
private:
const T* x_;
const T* y_;
T* z_;
OutType* z_;
int64_t nx_;
const DeviceContext& ctx_;
Functor func_;
@ -364,15 +365,16 @@ void ElementwiseGradCompute(const framework::ExecutionContext& ctx) {
}
}
template <typename Functor, typename DeviceContext, typename T>
template <typename Functor, typename DeviceContext, typename T,
typename OutType = T>
void ElementwiseComputeEx(const framework::ExecutionContext& ctx) {
using Tensor = framework::Tensor;
auto* x = ctx.Input<Tensor>("X");
auto* y = ctx.Input<Tensor>("Y");
auto* z = ctx.Output<Tensor>("Out");
z->mutable_data<T>(ctx.GetPlace());
TransformFunctor<Functor, T, DeviceContext> functor(
z->mutable_data<OutType>(ctx.GetPlace());
TransformFunctor<Functor, T, DeviceContext, OutType> functor(
x, y, z, ctx.template device_context<DeviceContext>(), Functor());
auto x_dims = x->dims();

@ -0,0 +1,70 @@
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "sampler.h"
namespace paddle {
namespace random {
Sampler::~Sampler() {}
UniformSampler::UniformSampler(int64 range)
: Sampler(range), inv_range_(1.0 / range) {
random_engine_ = std::make_shared<std::mt19937>(seed_);
dist_ = std::make_shared<std::uniform_int_distribution<>>(0, range);
}
UniformSampler::UniformSampler(int64 range, unsigned int seed)
: Sampler(range, seed), inv_range_(1.0 / range) {
random_engine_ = std::make_shared<std::mt19937>(seed_);
dist_ = std::make_shared<std::uniform_int_distribution<>>(0, range);
}
int64 UniformSampler::Sample() const { return (*dist_)(*random_engine_); }
float UniformSampler::Probability(int64 value) const { return inv_range_; }
LogUniformSampler::LogUniformSampler(int64 range)
: Sampler(range), log_range_(log(range + 1)) {
random_engine_ = std::make_shared<std::mt19937>(seed_);
dist_ = std::make_shared<std::uniform_real_distribution<>>(0, 1);
}
LogUniformSampler::LogUniformSampler(int64 range, unsigned int seed)
: Sampler(range, seed), log_range_(log(range + 1)) {
random_engine_ = std::make_shared<std::mt19937>(seed_);
dist_ = std::make_shared<std::uniform_real_distribution<>>(0, 1);
}
int64 LogUniformSampler::Sample() const {
// Got Log Uniform distribution from uniform distribution by
// inverse_transform_sampling method
// More details:
// https://wanghaoshuang.github.io/2017/11/Log-uniform-distribution-sampler/
const int64 value =
static_cast<int64>(exp((*dist_)(*random_engine_) * log_range_)) - 1;
// Mathematically, value should be <= range_, but might not be due to some
// floating point roundoff, so we mod by range_.
return value % range_;
}
float LogUniformSampler::Probability(int64 value) const {
// Given f(x) = 1/[(x+1) * log_range_]
// The value's probability is integral of f(x) from value to (value + 1)
// More details:
// https://wanghaoshuang.github.io/2017/11/Log-uniform-distribution-sampler
return (log((value + 2.0) / (value + 1.0))) / log_range_;
}
} // namespace random
} // namespace paddle

@ -0,0 +1,100 @@
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#pragma once
#include <memory>
#include <random>
typedef long int64;
namespace paddle {
namespace operators {
namespace math {
// TODO(wanghaoshuang): Support for GPU
/**
* Sample integers from [0, range).
*/
class Sampler {
public:
explicit Sampler(int64 range) : range_(range) {
PADDLE_ENFORCE_GT(range, 0);
std::random_device r;
seed_ = r();
}
explicit Sampler(int64 range, unsigned int seed)
: range_(range), seed_(seed) {
PADDLE_ENFORCE_GT(range, 0);
}
virtual ~Sampler();
// Sample a single value
virtual int64 Sample() const = 0;
// The probability that a single call to Sample() returns the given value.
virtual float Probability(int64 value) const = 0;
int64 range() { return range_; };
protected:
const int64 range_;
unsigned int seed_;
};
/**
* Sample integers from [0, range).
* And the distribution function is:
* P(x) = 1 / range
*/
class UniformSampler : public Sampler {
public:
explicit UniformSampler(int64 range);
explicit UniformSampler(int64 range, unsigned int seed);
~UniformSampler() override {}
int64 Sample() const override;
float Probability(int64 value) const override;
private:
const float inv_range_;
std::shared_ptr<std::mt19937_64> random_engine_;
std::shared_ptr<std::uniform_int_distribution<>> dist_;
};
/**
* Sample integers from [0, range).
* And the distribution function is:
* P(x) = (1/ln(range+1)) * ln(1 + 1/(x + 1))
*/
class LogUniformSampler : public Sampler {
public:
explicit LogUniformSampler(int64 range);
explicit LogUniformSampler(int64 range, unsigned int seed);
~LogUniformSampler() override {}
int64 Sample() const override;
float Probability(int64 value) const override;
private:
const float log_range_;
std::shared_ptr<std::mt19937_64> random_engine_;
std::shared_ptr<std::uniform_real_distribution<>> dist_;
};
} // math
} // namespace operators
} // namespace paddle

@ -24,11 +24,23 @@ import conll05
import uci_housing
import sentiment
import wmt14
import wmt16
import mq2007
import flowers
import voc2012
__all__ = [
'mnist', 'imikolov', 'imdb', 'cifar', 'movielens', 'conll05', 'sentiment'
'uci_housing', 'wmt14', 'mq2007', 'flowers', 'voc2012'
'mnist',
'imikolov',
'imdb',
'cifar',
'movielens',
'conll05',
'sentiment'
'uci_housing',
'wmt14',
'wmt16',
'mq2007',
'flowers',
'voc2012',
]

@ -25,8 +25,12 @@ import glob
import cPickle as pickle
__all__ = [
'DATA_HOME', 'download', 'md5file', 'split', 'cluster_files_reader',
'convert'
'DATA_HOME',
'download',
'md5file',
'split',
'cluster_files_reader',
'convert',
]
DATA_HOME = os.path.expanduser('~/.cache/paddle/dataset')
@ -58,12 +62,15 @@ def md5file(fname):
return hash_md5.hexdigest()
def download(url, module_name, md5sum):
def download(url, module_name, md5sum, save_name=None):
dirname = os.path.join(DATA_HOME, module_name)
if not os.path.exists(dirname):
os.makedirs(dirname)
filename = os.path.join(dirname, url.split('/')[-1])
filename = os.path.join(dirname,
url.split('/')[-1]
if save_name is None else save_name)
retry = 0
retry_limit = 3
while not (os.path.exists(filename) and md5file(filename) == md5sum):
@ -196,9 +203,11 @@ def convert(output_path, reader, line_count, name_prefix):
Convert data from reader to recordio format files.
:param output_path: directory in which output files will be saved.
:param reader: a data reader, from which the convert program will read data instances.
:param reader: a data reader, from which the convert program will read
data instances.
:param name_prefix: the name prefix of generated files.
:param max_lines_to_shuffle: the max lines numbers to shuffle before writing.
:param max_lines_to_shuffle: the max lines numbers to shuffle before
writing.
"""
assert line_count >= 1

@ -0,0 +1,66 @@
# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import paddle.v2.dataset.wmt16
import unittest
class TestWMT16(unittest.TestCase):
def checkout_one_sample(self, sample):
# train data has 3 field: source language word indices,
# target language word indices, and target next word indices.
self.assertEqual(len(sample), 3)
# test start mark and end mark in source word indices.
self.assertEqual(sample[0][0], 0)
self.assertEqual(sample[0][-1], 1)
# test start mask in target word indices
self.assertEqual(sample[1][0], 0)
# test en mask in target next word indices
self.assertEqual(sample[2][-1], 1)
def test_train(self):
for idx, sample in enumerate(
paddle.v2.dataset.wmt16.train(
src_dict_size=100000, trg_dict_size=100000)()):
if idx >= 10: break
self.checkout_one_sample(sample)
def test_test(self):
for idx, sample in enumerate(
paddle.v2.dataset.wmt16.test(
src_dict_size=1000, trg_dict_size=1000)()):
if idx >= 10: break
self.checkout_one_sample(sample)
def test_val(self):
for idx, sample in enumerate(
paddle.v2.dataset.wmt16.validation(
src_dict_size=1000, trg_dict_size=1000)()):
if idx >= 10: break
self.checkout_one_sample(sample)
def test_get_dict(self):
dict_size = 1000
word_dict = paddle.v2.dataset.wmt16.get_dict("en", dict_size, True)
self.assertEqual(len(word_dict), dict_size)
self.assertEqual(word_dict[0], "<s>")
self.assertEqual(word_dict[1], "<e>")
self.assertEqual(word_dict[2], "<unk>")
if __name__ == "__main__":
unittest.main()

@ -25,12 +25,20 @@ import gzip
import paddle.v2.dataset.common
from paddle.v2.parameters import Parameters
__all__ = ['train', 'test', 'build_dict', 'convert']
URL_DEV_TEST = 'http://www-lium.univ-lemans.fr/~schwenk/cslm_joint_paper/data/dev+test.tgz'
__all__ = [
'train',
'test',
'get_dict',
'convert',
]
URL_DEV_TEST = ('http://www-lium.univ-lemans.fr/~schwenk/'
'cslm_joint_paper/data/dev+test.tgz')
MD5_DEV_TEST = '7d7897317ddd8ba0ae5c5fa7248d3ff5'
# this is a small set of data for test. The original data is too large and will be add later.
URL_TRAIN = 'http://paddlepaddle.cdn.bcebos.com/demo/wmt_shrinked_data/wmt14.tgz'
# this is a small set of data for test. The original data is too large and
# will be add later.
URL_TRAIN = ('http://paddlepaddle.cdn.bcebos.com/demo/'
'wmt_shrinked_data/wmt14.tgz')
MD5_TRAIN = '0791583d57d5beb693b9414c5b36798c'
# BLEU of this trained model is 26.92
URL_MODEL = 'http://paddlepaddle.bj.bcebos.com/demo/wmt_14/wmt14_model.tar.gz'
@ -42,8 +50,8 @@ UNK = "<unk>"
UNK_IDX = 2
def __read_to_dict__(tar_file, dict_size):
def __to_dict__(fd, size):
def __read_to_dict(tar_file, dict_size):
def __to_dict(fd, size):
out_dict = dict()
for line_count, line in enumerate(fd):
if line_count < size:
@ -58,19 +66,19 @@ def __read_to_dict__(tar_file, dict_size):
if each_item.name.endswith("src.dict")
]
assert len(names) == 1
src_dict = __to_dict__(f.extractfile(names[0]), dict_size)
src_dict = __to_dict(f.extractfile(names[0]), dict_size)
names = [
each_item.name for each_item in f
if each_item.name.endswith("trg.dict")
]
assert len(names) == 1
trg_dict = __to_dict__(f.extractfile(names[0]), dict_size)
trg_dict = __to_dict(f.extractfile(names[0]), dict_size)
return src_dict, trg_dict
def reader_creator(tar_file, file_name, dict_size):
def reader():
src_dict, trg_dict = __read_to_dict__(tar_file, dict_size)
src_dict, trg_dict = __read_to_dict(tar_file, dict_size)
with tarfile.open(tar_file, mode='r') as f:
names = [
each_item.name for each_item in f
@ -152,7 +160,7 @@ def get_dict(dict_size, reverse=True):
# if reverse = False, return dict = {'a':'001', 'b':'002', ...}
# else reverse = true, return dict = {'001':'a', '002':'b', ...}
tar_file = paddle.v2.dataset.common.download(URL_TRAIN, 'wmt14', MD5_TRAIN)
src_dict, trg_dict = __read_to_dict__(tar_file, dict_size)
src_dict, trg_dict = __read_to_dict(tar_file, dict_size)
if reverse:
src_dict = {v: k for k, v in src_dict.items()}
trg_dict = {v: k for k, v in trg_dict.items()}

File diff suppressed because it is too large Load Diff

@ -37,6 +37,7 @@ import clip
from memory_optimization_transpiler import memory_optimize
Tensor = LoDTensor
__all__ = framework.__all__ + executor.__all__ + [
'io',
'initializer',
@ -85,7 +86,9 @@ def __bootstrap__():
os.environ['OMP_NUM_THREADS'] = str(num_threads)
read_env_flags = ['use_pinned_memory', 'check_nan_inf']
read_env_flags = [
'use_pinned_memory', 'check_nan_inf', 'do_memory_benchmark'
]
if core.is_compile_gpu():
read_env_flags += ['fraction_of_gpu_memory_to_use', 'op_sync']
core.init_gflags([sys.argv[0]] +
@ -94,4 +97,5 @@ def __bootstrap__():
core.init_devices()
layers.monkey_patch_variable()
__bootstrap__()

@ -14,6 +14,7 @@
import functools
import layers
import framework
from . import core
__all__ = [
@ -66,7 +67,7 @@ def error_clip_callback(block, context):
class BaseGradientClipAttr(object):
def process_context(self, context, p_g):
def process_context(self, context, param, grad):
raise NotImplementedError()
def create_operators(self, param, grad):
@ -74,7 +75,7 @@ class BaseGradientClipAttr(object):
class NullGradientClipAttr(BaseGradientClipAttr):
def process_context(self, context, p_g):
def process_context(self, context, param, grad):
pass
def create_operators(self, param, grad):
@ -91,7 +92,7 @@ class GradientClipByValue(BaseGradientClipAttr):
self.max = max
self.min = min
def process_context(self, context, p_g):
def process_context(self, context, param, grad):
pass
def create_operators(self, param, grad):
@ -99,19 +100,93 @@ class GradientClipByValue(BaseGradientClipAttr):
return param, new_grad
class GradientClipByNorm(BaseGradientClipAttr):
def __init__(self, clip_norm):
self.clip_norm = clip_norm
def process_context(self, context, param, grad):
pass
def create_operators(self, param, grad):
new_grad = layers.clip_by_norm(x=grad, max_norm=self.clip_norm)
return param, new_grad
class GradientClipByGlobalNorm(BaseGradientClipAttr):
def __init__(self, clip_norm, group_name="default_group"):
if not isinstance(group_name, basestring):
raise TypeError("'group_name' must be a basestring.")
self.clip_norm = clip_norm
self.group_name = group_name
def process_context(self, context, param, grad):
if self.group_name not in context:
context[self.group_name] = []
context[self.group_name + "_clip_value"] = self.clip_norm
context[self.group_name + "_clip"] = layers.fill_constant(
shape=[1], dtype="float32", value=self.clip_norm)
else:
if not self.clip_norm == context[self.group_name + "_clip_value"]:
raise ValueError(
"All parameters' 'clip_norm' of a same group should be the same"
)
local_norm_var = layers.reduce_sum(input=layers.pow(x=grad, factor=2.0))
context[self.group_name].append(local_norm_var)
self.context = context
def create_operators(self, param, grad):
group_scale_name = self.group_name + "_scale"
if group_scale_name not in self.context:
group_norm_var = layers.sums(input=self.context[self.group_name])
layers.sqrt(x=group_norm_var, out=group_norm_var)
clip_var = self.context[self.group_name + "_clip"]
group_scale_var = layers.elementwise_div(
x=clip_var,
y=layers.elementwise_max(
x=clip_var, y=group_norm_var))
assert group_scale_var.shape == (1L, )
self.context[group_scale_name] = group_scale_var
new_grad = layers.elementwise_mul(
x=grad, y=self.context[group_scale_name])
return param, new_grad
def gradient_clip_by_global_norm(clip_norm,
param_list=None,
group_name="default_group",
program=None):
if program is None:
program = framework.default_main_program()
if param_list is None:
param_list = program.block(0).all_parameters()
if all(isinstance(elem, basestring) for elem in param_list):
param_list = [program.block(0).var(elem) for elem in param_list]
if not all(isinstance(elem, framework.Parameter) for elem in param_list):
raise TypeError(
"'param_list' should be a list of Parameter or basestring(parameter's name)."
)
for param in param_list:
param.gradient_clip_attr = GradientClipByGlobalNorm(clip_norm,
group_name)
def append_gradient_clip_ops(param_grad):
context = dict()
create_op_callbacks = []
for p, g in param_grad:
clip_attr = getattr(p, 'clip_attr', NullGradientClipAttr())
clip_attr = getattr(p, 'gradient_clip_attr', NullGradientClipAttr())
if clip_attr is None:
clip_attr = NullGradientClipAttr()
if not isinstance(clip_attr, BaseGradientClipAttr):
raise TypeError(
"clip attribute should be an instance of BaseGradientClippingAttr"
)
"clip attribute should be an instance of BaseGradientClipAttr")
clip_attr.process_context(context=context, p_g=param_grad)
clip_attr.process_context(context=context, param=p, grad=g)
create_op_callbacks.append(
functools.partial(
clip_attr.create_operators, param=p, grad=g))

@ -780,7 +780,7 @@ class Block(object):
trainable=p.trainable,
optimize_attr=p.optimize_attr,
regularizer=p.regularizer,
clip_attr=p.clip_attr,
gradient_clip_attr=p.gradient_clip_attr,
error_clip=p.error_clip,
name=v.name)
self.vars[new_p.name] = new_p
@ -948,7 +948,7 @@ class Parameter(Variable):
self.regularizer = kwargs.get('regularizer', None)
self.clip_attr = kwargs.get('clip_attr', None)
self.gradient_clip_attr = kwargs.get('gradient_clip_attr', None)
# program is a global instance.

@ -24,6 +24,8 @@ import control_flow
from control_flow import *
import device
from device import *
import math_op_patch
from math_op_patch import *
__all__ = []
__all__ += nn.__all__
@ -32,3 +34,4 @@ __all__ += tensor.__all__
__all__ += control_flow.__all__
__all__ += ops.__all__
__all__ += device.__all__
__all__ += math_op_patch.__all__

@ -11,22 +11,41 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import contextlib
from ..layer_helper import LayerHelper, unique_name
from ..framework import Program, Variable, Operator
from .. import core
from layer_function_generator import autodoc
from tensor import assign, fill_constant
import contextlib
from ..registry import autodoc
from .. import core
from ..framework import Program, Variable, Operator
from ..layer_helper import LayerHelper, unique_name
__all__ = [
'split_lod_tensor', 'merge_lod_tensor', 'BlockGuard',
'BlockGuardWithCompletion', 'StaticRNNMemoryLink', 'WhileGuard', 'While',
'lod_rank_table', 'max_sequence_len', 'topk', 'lod_tensor_to_array',
'array_to_lod_tensor', 'increment', 'array_write', 'create_array',
'less_than', 'array_read', 'shrink_memory', 'array_length', 'IfElse',
'DynamicRNN', 'ConditionalBlock', 'StaticRNN', 'reorder_lod_tensor_by_rank',
'ParallelDo', 'Print'
'split_lod_tensor',
'merge_lod_tensor',
'BlockGuard',
'BlockGuardWithCompletion',
'StaticRNNMemoryLink',
'WhileGuard',
'While',
'lod_rank_table',
'max_sequence_len',
'topk',
'lod_tensor_to_array',
'array_to_lod_tensor',
'increment',
'array_write',
'create_array',
'less_than',
'array_read',
'shrink_memory',
'array_length',
'IfElse',
'DynamicRNN',
'ConditionalBlock',
'StaticRNN',
'reorder_lod_tensor_by_rank',
'ParallelDo',
'Print',
]
@ -1458,7 +1477,7 @@ class DynamicRNN(object):
method))
@autodoc
@autodoc()
def reorder_lod_tensor_by_rank(x, rank_table):
helper = LayerHelper('reorder_lod_tensor_by_rank', **locals())
helper.is_instance('x', Variable)

@ -15,14 +15,14 @@
All util layers.
"""
from ..layer_helper import LayerHelper
from layer_function_generator import autodoc
from ..framework import unique_name
from ..registry import autodoc
from ..layer_helper import LayerHelper
__all__ = ['get_places']
@autodoc
@autodoc()
def get_places(device_count=None, device_type=None):
helper = LayerHelper('get_places', **locals())
out_places = helper.create_variable(name=unique_name(helper.name + ".out"))

@ -13,17 +13,19 @@
# limitations under the License.
import re
import cStringIO
import warnings
import functools
import inspect
import warnings
from .. import proto
import proto.framework_pb2 as framework_pb2
from framework import OpProtoHolder, Variable, Program, Operator
from paddle.v2.fluid.layer_helper import LayerHelper, unique_name
framework_pb2 = proto.framework_pb2
from ..framework import OpProtoHolder, Variable
from ..layer_helper import LayerHelper
__all__ = [
'deprecated',
'register_layer',
'generate_layer_fn',
'autodoc',
]
@ -96,7 +98,7 @@ def _generate_doc_string_(op_proto):
return buf.getvalue()
def register_layer(op_type):
def generate_layer_fn(op_type):
"""Register the Python layer for an Operator.
Args:
@ -207,7 +209,10 @@ def deprecated(func_or_class):
return func_wrapper
def autodoc(func):
func.__doc__ = _generate_doc_string_(OpProtoHolder.instance().get_op_proto(
func.__name__))
def autodoc(comment=""):
def __impl__(func):
func.__doc__ = _generate_doc_string_(OpProtoHolder.instance(
).get_op_proto(func.__name__)) + comment
return func
return __impl__

@ -0,0 +1,152 @@
# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from ..framework import Variable, unique_name
from ..registry import OpProtoHolder
__all__ = ['monkey_patch_variable']
def monkey_patch_variable():
def unique_tmp_name():
return unique_name("tmp")
def safe_get_dtype(var):
try:
dtype = var.dtype
except:
raise ValueError("Cannot get data type from %s", var.name)
return dtype
def create_tensor(block, value, dtype, shape):
value = float(value)
tmp_name = unique_tmp_name()
var = block.create_var(name=tmp_name, shape=shape, dtype=dtype)
block.append_op(
type="fill_constant",
outputs={'Out': [var]},
attrs={'dtype': var.dtype,
'shape': shape,
'value': value})
return var
def create_scalar(block, value, dtype):
return create_tensor(block, value, dtype, shape=[1])
def create_tensor_with_batchsize(ref_var, value, dtype):
assert isinstance(ref_var, Variable)
value = float(value)
tmp_name = unique_tmp_name()
var = ref_var.block.create_var(name=tmp_name, dtype=dtype)
ref_var.block.append_op(
type='fill_constant_batch_size_like',
outputs={'Out': [var]},
inputs={'Input': [ref_var]},
attrs={'shape': ref_var.shape,
'value': value})
return var
def astype(self, dtype):
"""
Cast a variable to a specified data type.
NOTE: The variable must be a Tensor
Args:
self(Variable): The source variable
dtype: The target dtype
Returns:
Variable with new dtype
"""
tmp_name = unique_tmp_name()
out = self.block.create_var(name=tmp_name, dtype=dtype)
self.block.append_op(
type="cast",
inputs={"X": [self]},
outputs={"Out": [out]},
attrs={"in_dtype": self.dtype,
"out_dtype": out.dtype})
return out
def _elemwise_method_creator_(method_name, op_type, reverse=False):
def __impl__(self, other_var):
lhs_dtype = safe_get_dtype(self)
if not isinstance(other_var, Variable):
if reverse:
has_batch_size = False
for elem in self.shape:
if elem < 0:
has_batch_size = True
break
if not has_batch_size:
other_var = create_tensor(
self.block,
other_var,
dtype=lhs_dtype,
shape=self.shape)
else:
other_var = create_tensor_with_batchsize(
self, other_var, lhs_dtype)
else:
# add fill_op to self.block
other_var = create_scalar(
self.block, value=other_var, dtype=lhs_dtype)
rhs_dtype = safe_get_dtype(other_var)
if lhs_dtype != rhs_dtype:
other_var = astype(other_var, lhs_dtype)
if reverse:
tmp = self
self = other_var
other_var = tmp
tmp_name = unique_tmp_name()
out = self.block.create_var(name=tmp_name, dtype=lhs_dtype)
self.block.append_op(
type=op_type,
inputs={'X': [self],
'Y': [other_var]},
outputs={'Out': out})
return out
comment = OpProtoHolder.instance().get_op_proto(op_type).comment
__impl__.__doc__ = """
{0}
Args:
self(Variable): left hand variable
other_var(Variable|float|int): right hand variable
Returns:
Variable
""".format(comment)
__impl__.__name__ = method_name
return __impl__
# inject methods
for method_name, op_type, reverse in (
("__add__", "elementwise_add", False),
# a+b == b+a. Do not need to reverse explicitly
("__radd__", "elementwise_add", False),
("__sub__", "elementwise_sub", False),
("__rsub__", "elementwise_sub", True),
("__mul__", "elementwise_mul", False),
# a*b == b*a. Do not need to reverse explicitly
("__rmul__", "elementwise_mul", False),
("__div__", "elementwise_div", False),
("__rdiv__", "elementwise_div", True)):
setattr(Variable, method_name,
_elemwise_method_creator_(method_name, op_type, reverse))
Variable.astype = astype

@ -11,8 +11,7 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from ..registry import register_layer
from layer_function_generator import generate_layer_fn
__activations__ = [
'sigmoid',
@ -46,21 +45,11 @@ __activations__ = [
]
__all__ = [
'mean',
'mul',
'reshape',
'scale',
'transpose',
'sigmoid_cross_entropy_with_logits',
'elementwise_add',
'elementwise_div',
'elementwise_sub',
'elementwise_mul',
'elementwise_max',
'elementwise_min',
'clip',
'sequence_softmax',
'mean', 'mul', 'reshape', 'scale', 'transpose',
'sigmoid_cross_entropy_with_logits', 'elementwise_add', 'elementwise_div',
'elementwise_sub', 'elementwise_mul', 'elementwise_max', 'elementwise_min',
'clip', 'clip_by_norm', 'sequence_softmax'
] + __activations__
for _OP in set(__all__):
globals()[_OP] = register_layer(_OP)
globals()[_OP] = generate_layer_fn(_OP)

@ -25,13 +25,13 @@ class ParamAttr(object):
learning_rate=1.0,
regularizer=None,
trainable=True,
clip=None):
gradient_clip=None):
self.name = name
self.initializer = initializer
self.learning_rate = learning_rate
self.regularizer = regularizer
self.trainable = trainable
self.clip = clip
self.gradient_clip = gradient_clip
def set_default_initializer(self, initializer):
if initializer is None:
@ -77,7 +77,7 @@ class ParamAttr(object):
},
'regularizer': self.regularizer,
'trainable': self.trainable,
'clip_attr': self.clip
'gradient_clip_attr': self.gradient_clip
}
if with_initializer:
kwargs['initializer'] = self.initializer

@ -6,3 +6,4 @@ endforeach()
add_subdirectory(book)
add_subdirectory(book_distribute)
add_subdirectory(book_memory_optimization)

Some files were not shown because too many files have changed in this diff Show More

Loading…
Cancel
Save