Merge branch 'develop' into stride

release/0.10.0
Luo Tao 8 years ago
commit c1738e29c8

@ -64,6 +64,7 @@ include(external/python) # download, build, install python
include(external/openblas) # download, build, install openblas
include(external/swig) # download, build, install swig
include(external/warpctc) # download, build, install warpctc
include(external/any) # download libn::any
include(package) # set paddle packages
include(cpplint) # set paddle c++ style

@ -0,0 +1,20 @@
INCLUDE(ExternalProject)
SET(ANY_SOURCE_DIR ${THIRD_PARTY_PATH}/any)
INCLUDE_DIRECTORIES(${ANY_SOURCE_DIR}/src/linb_any)
ExternalProject_Add(
linb_any
${EXTERNAL_PROJECT_LOG_ARGS}
GIT_REPOSITORY "https://github.com/thelink2012/any.git"
GIT_TAG "8fef1e93710a0edf8d7658999e284a1142c4c020"
PREFIX ${ANY_SOURCE_DIR}
UPDATE_COMMAND ""
CONFIGURE_COMMAND ""
BUILD_COMMAND ""
INSTALL_COMMAND ""
TEST_COMMAND ""
)
add_definitions(-DANY_IMPL_ANY_CAST_MOVEABLE)

@ -1,13 +1,17 @@
import sys
import paddle.v2 as paddle
def seqToseq_net(source_dict_dim, target_dict_dim):
def seqToseq_net(source_dict_dim, target_dict_dim, is_generating=False):
### Network Architecture
word_vector_dim = 512 # dimension of word vector
decoder_size = 512 # dimension of hidden unit in GRU Decoder network
encoder_size = 512 # dimension of hidden unit in GRU Encoder network
beam_size = 3
max_length = 250
#### Encoder
src_word_id = paddle.layer.data(
name='source_language_word',
@ -67,30 +71,57 @@ def seqToseq_net(source_dict_dim, target_dict_dim):
group_input2 = paddle.layer.StaticInputV2(input=encoded_proj, is_seq=True)
group_inputs = [group_input1, group_input2]
trg_embedding = paddle.layer.embedding(
input=paddle.layer.data(
name='target_language_word',
type=paddle.data_type.integer_value_sequence(target_dict_dim)),
size=word_vector_dim,
param_attr=paddle.attr.ParamAttr(name='_target_language_embedding'))
group_inputs.append(trg_embedding)
# For decoder equipped with attention mechanism, in training,
# target embeding (the groudtruth) is the data input,
# while encoded source sequence is accessed to as an unbounded memory.
# Here, the StaticInput defines a read-only memory
# for the recurrent_group.
decoder = paddle.layer.recurrent_group(
name=decoder_group_name,
step=gru_decoder_with_attention,
input=group_inputs)
lbl = paddle.layer.data(
name='target_language_next_word',
type=paddle.data_type.integer_value_sequence(target_dict_dim))
cost = paddle.layer.classification_cost(input=decoder, label=lbl)
return cost
if not is_generating:
trg_embedding = paddle.layer.embedding(
input=paddle.layer.data(
name='target_language_word',
type=paddle.data_type.integer_value_sequence(target_dict_dim)),
size=word_vector_dim,
param_attr=paddle.attr.ParamAttr(name='_target_language_embedding'))
group_inputs.append(trg_embedding)
# For decoder equipped with attention mechanism, in training,
# target embeding (the groudtruth) is the data input,
# while encoded source sequence is accessed to as an unbounded memory.
# Here, the StaticInput defines a read-only memory
# for the recurrent_group.
decoder = paddle.layer.recurrent_group(
name=decoder_group_name,
step=gru_decoder_with_attention,
input=group_inputs)
lbl = paddle.layer.data(
name='target_language_next_word',
type=paddle.data_type.integer_value_sequence(target_dict_dim))
cost = paddle.layer.classification_cost(input=decoder, label=lbl)
return cost
else:
# In generation, the decoder predicts a next target word based on
# the encoded source sequence and the last generated target word.
# The encoded source sequence (encoder's output) must be specified by
# StaticInput, which is a read-only memory.
# Embedding of the last generated word is automatically gotten by
# GeneratedInputs, which is initialized by a start mark, such as <s>,
# and must be included in generation.
trg_embedding = paddle.layer.GeneratedInputV2(
size=target_dict_dim,
embedding_name='_target_language_embedding',
embedding_size=word_vector_dim)
group_inputs.append(trg_embedding)
beam_gen = paddle.layer.beam_search(
name=decoder_group_name,
step=gru_decoder_with_attention,
input=group_inputs,
bos_id=0,
eos_id=1,
beam_size=beam_size,
max_length=max_length)
return beam_gen
def main():

@ -16,66 +16,6 @@ limitations under the License. */
namespace paddle {
template <>
size_t FuncConfig::get<size_t>(const std::string& key) const {
auto it = valueMap_.find(key);
CHECK(it != valueMap_.end()) << "Cannot find value: '" << key << "'";
return it->second.s;
}
template <>
real FuncConfig::get<real>(const std::string& key) const {
auto it = valueMap_.find(key);
CHECK(it != valueMap_.end()) << "Cannot find value: '" << key << "'";
return it->second.r;
}
template <>
int FuncConfig::get<int>(const std::string& key) const {
auto it = valueMap_.find(key);
CHECK(it != valueMap_.end()) << "Cannot find value: '" << key << "'";
return it->second.i;
}
template <>
bool FuncConfig::get<bool>(const std::string& key) const {
auto it = valueMap_.find(key);
CHECK(it != valueMap_.end()) << "Cannot find value: '" << key << "'";
return it->second.b;
}
template <>
FuncConfig& FuncConfig::set<size_t>(const std::string& key, size_t v) {
CHECK_EQ(static_cast<int>(valueMap_.count(key)), 0) << "Duplicated value: "
<< key;
valueMap_[key].s = v;
return *this;
}
template <>
FuncConfig& FuncConfig::set<real>(const std::string& key, real v) {
CHECK_EQ(static_cast<int>(valueMap_.count(key)), 0) << "Duplicated value: "
<< key;
valueMap_[key].r = v;
return *this;
}
template <>
FuncConfig& FuncConfig::set<int>(const std::string& key, int v) {
CHECK_EQ(static_cast<int>(valueMap_.count(key)), 0) << "Duplicated value: "
<< key;
valueMap_[key].i = v;
return *this;
}
template <>
FuncConfig& FuncConfig::set<bool>(const std::string& key, bool v) {
CHECK_EQ(static_cast<int>(valueMap_.count(key)), 0) << "Duplicated value: "
<< key;
valueMap_[key].b = v;
return *this;
}
void BufferArgs::addArg(const Matrix& arg,
const TensorShape& shape,
ArgType argType) {

@ -18,32 +18,49 @@ limitations under the License. */
#include <vector>
#include "BufferArg.h"
#include "paddle/math/Matrix.h"
#include "paddle/utils/Any.h"
#include "paddle/utils/ClassRegistrar.h"
#include "paddle/utils/Error.h"
namespace paddle {
/**
* Function Configuration.
* The argument type of Function::init.
* Follow-up will consider moving this data structure to Proto inside.
*/
class FuncConfig {
public:
union value {
size_t s;
real r;
int i;
bool b;
};
template <typename T>
T get(const std::string& key) const;
T get(const std::string& key, Error* err = nullptr) const {
try {
return any_cast<T>(valueMap_.at(key));
} catch (std::exception& e) { // could be cast or out of range exception.
if (err) {
*err = Error(e.what());
} else {
LOG(FATAL) << "Cannot get key " << key << "with error " << e.what();
}
return T();
}
}
template <typename T>
FuncConfig& set(const std::string& key, T v);
FuncConfig& set(const std::string& key, T v, Error* err = nullptr) {
auto it = valueMap_.find(key);
if (it != valueMap_.end()) { // already contains key.
if (err) {
*err = Error("Key %s is already set in FuncConfig", key.c_str());
} else {
LOG(FATAL) << "Key " << key << " is already set in FuncConfig.";
}
return *this;
}
valueMap_[key] = any(v);
return *this;
}
protected:
std::map<std::string, value> valueMap_;
mutable std::unordered_map<std::string, any> valueMap_;
};
/**

@ -25,9 +25,9 @@ void Pad<DEVICE_TYPE_CPU>(real* outputs,
const int inH,
const int inW,
const PadConf& pad) {
int cstart = pad.channelStart, cend = pad.channelEnd;
int hstart = pad.heightStart, hend = pad.heightEnd;
int wstart = pad.widthStart, wend = pad.widthEnd;
int cstart = pad.channel[0], cend = pad.channel[1];
int hstart = pad.height[0], hend = pad.height[1];
int wstart = pad.width[0], wend = pad.width[1];
int outC = inC + cstart + cend;
int outH = inH + hstart + hend;
int outW = inW + wstart + wend;
@ -51,9 +51,9 @@ void PadGrad<DEVICE_TYPE_CPU>(real* inGrad,
const int inH,
const int inW,
const PadConf& pad) {
int cstart = pad.channelStart, cend = pad.channelEnd;
int hstart = pad.heightStart, hend = pad.heightEnd;
int wstart = pad.widthStart, wend = pad.widthEnd;
int cstart = pad.channel[0], cend = pad.channel[1];
int hstart = pad.height[0], hend = pad.height[1];
int wstart = pad.width[0], wend = pad.width[1];
int outC = inC + cstart + cend;
int outH = inH + hstart + hend;
int outW = inW + wstart + wend;
@ -71,6 +71,12 @@ void PadGrad<DEVICE_TYPE_CPU>(real* inGrad,
}
}
static inline PadConf castToPadConf(const FuncConfig& conf) {
return {conf.get<std::vector<uint32_t>>("channel"),
conf.get<std::vector<uint32_t>>("height"),
conf.get<std::vector<uint32_t>>("width")};
}
/**
* \brief Padding zeros to input according to the specify dimension.
* The struct pad_ contains the padding size in each dimension.
@ -127,14 +133,7 @@ void PadGrad<DEVICE_TYPE_CPU>(real* inGrad,
template <DeviceType Device>
class PadFunc : public FunctionBase {
public:
void init(const FuncConfig& config) override {
pad_.channelStart = config.get<int>("cstart");
pad_.channelEnd = config.get<int>("cend");
pad_.heightStart = config.get<int>("hstart");
pad_.heightEnd = config.get<int>("hend");
pad_.widthStart = config.get<int>("wstart");
pad_.widthEnd = config.get<int>("wend");
}
void init(const FuncConfig& config) override { pad_ = castToPadConf(config); }
void calc(const BufferArgs& inputs, const BufferArgs& outputs) override {
CHECK_EQ(1UL, inputs.size());
@ -175,14 +174,7 @@ private:
template <DeviceType Device>
class PadGradFunc : public FunctionBase {
public:
void init(const FuncConfig& config) override {
pad_.channelStart = config.get<int>("cstart");
pad_.channelEnd = config.get<int>("cend");
pad_.heightStart = config.get<int>("hstart");
pad_.heightEnd = config.get<int>("hend");
pad_.widthStart = config.get<int>("wstart");
pad_.widthEnd = config.get<int>("wend");
}
void init(const FuncConfig& config) override { pad_ = castToPadConf(config); }
void calc(const BufferArgs& inputs, const BufferArgs& outputs) override {
CHECK_EQ(1UL, inputs.size());

@ -19,18 +19,12 @@ limitations under the License. */
namespace paddle {
struct PadConf {
/// how many values to add before the data along channel dimension.
int channelStart;
/// how many values to add after the data along channel dimension.
int channelEnd;
/// how many values to add before the data along height dimension.
int heightStart;
/// how many values to add after the data along height dimension.
int heightEnd;
/// how many values to add before the data along width dimension.
int widthStart;
/// how many values to add after the data along width dimension.
int widthEnd;
/// how many values to add before/after the data along channel dimension.
std::vector<uint32_t> channel;
/// how many values to add before/after the data along height dimension.
std::vector<uint32_t> height;
/// how many values to add before/after the data along width dimension.
std::vector<uint32_t> width;
};
/**

@ -36,12 +36,9 @@ bool PadLayer::init(const LayerMap& layerMap,
CHECK_EQ(2, pad_conf.pad_c_size());
CHECK_EQ(2, pad_conf.pad_h_size());
CHECK_EQ(2, pad_conf.pad_w_size());
padc_.push_back(pad_conf.pad_c(0));
padc_.push_back(pad_conf.pad_c(1));
padh_.push_back(pad_conf.pad_h(0));
padh_.push_back(pad_conf.pad_h(1));
padw_.push_back(pad_conf.pad_w(0));
padw_.push_back(pad_conf.pad_w(1));
padc_ = {pad_conf.pad_c(0), pad_conf.pad_c(1)};
padh_ = {pad_conf.pad_h(0), pad_conf.pad_h(1)};
padw_ = {pad_conf.pad_w(0), pad_conf.pad_w(1)};
outDims_ = TensorShape(4);
setOutDims(0);
@ -49,21 +46,15 @@ bool PadLayer::init(const LayerMap& layerMap,
createFunction(forward_,
"Pad",
FuncConfig()
.set("cstart", padc_[0])
.set("cend", padc_[1])
.set("hstart", padh_[0])
.set("hend", padh_[1])
.set("wstart", padw_[0])
.set("wend", padw_[1]));
.set("channel", padc_)
.set("height", padh_)
.set("width", padw_));
createFunction(backward_,
"PadGrad",
FuncConfig()
.set("cstart", padc_[0])
.set("cend", padc_[1])
.set("hstart", padh_[0])
.set("hend", padh_[1])
.set("wstart", padw_[0])
.set("wend", padw_[1]));
.set("channel", padc_)
.set("height", padh_)
.set("width", padw_));
return true;
}

@ -38,9 +38,9 @@ protected:
void setOutDims(const size_t batchSize);
void setTensorDim(const size_t batchSize);
std::vector<int> padc_;
std::vector<int> padh_;
std::vector<int> padw_;
std::vector<uint32_t> padc_;
std::vector<uint32_t> padh_;
std::vector<uint32_t> padw_;
TensorShape inDims_;
TensorShape outDims_;
};

@ -160,10 +160,19 @@ class SparseFloatScanner(SparseBinaryScanner):
class IndexScanner(IScanner):
def __init__(self, input_type, pos):
IScanner.__init__(self, input_type, pos)
self.__ids__ = []
self.__ids__ = None
self.__idx__ = 0
def pre_scan(self, dat):
self.__idx__ += 1
def finish_pre_scan(self, argument):
self.__ids__ = [0] * self.__idx__
self.__idx__ = 0
def scan(self, dat):
self.__ids__.append(dat)
self.__ids__[self.__idx__] = dat
self.__idx__ += 1
def finish_scan(self, argument):
ids = swig_paddle.IVector.create(self.__ids__, self.data_in_gpu)

@ -0,0 +1,35 @@
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#pragma once
#if __cplusplus > 201402L
#include <any>
namespace paddle {
// using std::any for C++ 17
using std::any;
using std::any_cast;
using std::bad_any_cast;
} // namespace paddle
#else
#include <any.hpp>
namespace paddle {
// use linb::any for C++ 11
using linb::any;
using linb::any_cast;
using linb::bad_any_cast;
} // namespace paddle
#endif

@ -18,7 +18,7 @@ import inspect
from paddle.trainer.config_parser import *
from .activations import LinearActivation, SigmoidActivation, TanhActivation, \
ReluActivation, IdentityActivation, SoftmaxActivation
ReluActivation, IdentityActivation, SoftmaxActivation, BaseActivation
from .evaluators import *
from .poolings import MaxPooling, AvgPooling, BasePoolingType
from .attrs import *
@ -2277,8 +2277,9 @@ def img_pool_layer(input,
pool_type.name = 'avg'
type_name = pool_type.name + '-projection' \
if (isinstance(pool_type, AvgPooling) or isinstance(pool_type, MaxPooling)) \
else pool_type.name
if (
isinstance(pool_type, AvgPooling) or isinstance(pool_type, MaxPooling)) \
else pool_type.name
pool_size_y = pool_size if pool_size_y is None else pool_size_y
stride_y = stride if stride_y is None else stride_y
@ -3318,8 +3319,8 @@ def recurrent_group(step,
assert (targetInlink == None or targetInlink_in_inlinks())
targetInlinkName = None if targetInlink == None \
else targetInlink.name if isinstance(targetInlink, LayerOutput) \
else targetInlink.input.name
else targetInlink.name if isinstance(targetInlink, LayerOutput) \
else targetInlink.input.name
contains_sub_seq = [False]
@ -4831,12 +4832,14 @@ def crf_decoding_layer(input,
return LayerOutput(name, LayerType.CRF_DECODING_LAYER, parents, size=1)
@wrap_act_default(act=SigmoidActivation())
@wrap_bias_attr_default(has_bias=True)
@wrap_name_default()
@layer_support()
def nce_layer(input,
label,
num_classes,
act=None,
weight=None,
num_neg_samples=10,
neg_distribution=None,
@ -4865,6 +4868,8 @@ def nce_layer(input,
:type weight: LayerOutput
:param num_classes: number of classes.
:type num_classes: int
:param act: Activation, default is Sigmoid.
:type act: BaseActivation
:param num_neg_samples: number of negative samples. Default is 10.
:type num_neg_samples: int
:param neg_distribution: The distribution for generating the random negative labels.
@ -4887,6 +4892,8 @@ def nce_layer(input,
assert isinstance(neg_distribution, collections.Sequence)
assert len(neg_distribution) == num_classes
assert sum(neg_distribution) == 1
if not isinstance(act, BaseActivation):
raise TypeError()
ipts_for_layer = []
parents = []
@ -4908,12 +4915,17 @@ def nce_layer(input,
type=LayerType.NCE_LAYER,
num_classes=num_classes,
neg_sampling_dist=neg_distribution,
active_type=act.name,
num_neg_samples=num_neg_samples,
inputs=ipts_for_layer,
bias=ParamAttr.to_bias(bias_attr),
**ExtraLayerAttribute.to_kwargs(layer_attr))
return LayerOutput(
name, LayerType.NCE_LAYER, parents=parents, size=l.config.size)
name,
LayerType.NCE_LAYER,
parents=parents,
size=l.config.size,
activation=act)
"""

@ -67,7 +67,16 @@ class Layer(object):
self.name = name
self.__context__ = {}
self.__parent_layers__ = parent_layers
self.__children_layers__ = [] # used for evaluator.
# some layer may have some extra parent layer
self.__extra_parent__ = []
# used for evaluator.
self.__children_layers__ = []
def extra_parent(self):
return self.__extra_parent__
def append_extra_parent(self, parent):
self.__extra_parent__.append(parent)
def append_child(self, layer, parent_names):
self.__children_layers__.append((layer, parent_names))
@ -78,14 +87,20 @@ class Layer(object):
"""
self.__context__ = context
# short cut if myself is parsed before.
# STEP: short cut if this layer is parsed before.
if self.context_name() in context:
if self.use_context_name():
return context[self.context_name()]
else:
return context[self.name]
# parse parent before myself
# STEP: parse extra_parent that is not used by this layer but must
# be parsed before this layer.
for p in self.__extra_parent__:
p.to_proto(context=context)
# STEP: parse parent that is used by this layer, get the result and
# insert into kwargs of the next layer's to_proto_impl method.
kwargs = dict()
for layer_name in self.__parent_layers__:
if not isinstance(self.__parent_layers__[layer_name],
@ -97,14 +112,13 @@ class Layer(object):
self.__parent_layers__[layer_name])
kwargs[layer_name] = v1_layer
# parse myself.
# STEP: parse myself and add myself into context.
ret_val = self.to_proto_impl(**kwargs)
if self.context_name() is not None and \
self.context_name() not in context:
if self.context_name() is not None \
and self.context_name() not in context:
context[self.context_name()] = ret_val
# parse children.
# STEP: parse children that should be pased after this layer.
for layer, pnames in self.__children_layers__:
drop = False
@ -117,6 +131,7 @@ class Layer(object):
continue
layer.to_proto(context=context)
# STEP: return v1 layer result
if self.context_name() is None:
return ret_val
elif self.use_context_name():

@ -66,13 +66,6 @@ def download(url, module_name, md5sum):
return filename
def dict_add(a_dict, ele):
if ele in a_dict:
a_dict[ele] += 1
else:
a_dict[ele] = 1
def fetch_all():
for module_name in filter(lambda x: not x.startswith("__"),
dir(paddle.v2.dataset)):

@ -18,6 +18,7 @@ TODO(yuyang18): Complete comments.
"""
import paddle.v2.dataset.common
import collections
import tarfile
import Queue
import re
@ -48,10 +49,10 @@ def tokenize(pattern):
def build_dict(pattern, cutoff):
word_freq = {}
word_freq = collections.defaultdict(int)
for doc in tokenize(pattern):
for word in doc:
paddle.v2.dataset.common.dict_add(word_freq, word)
word_freq[word] += 1
# Not sure if we should prune less-frequent words here.
word_freq = filter(lambda x: x[1] > cutoff, word_freq.items())

@ -17,6 +17,7 @@ imikolov's simple dataset: http://www.fit.vutbr.cz/~imikolov/rnnlm/
Complete comments.
"""
import paddle.v2.dataset.common
import collections
import tarfile
__all__ = ['train', 'test', 'build_dict']
@ -26,15 +27,14 @@ MD5 = '30177ea32e27c525793142b6bf2c8e2d'
def word_count(f, word_freq=None):
add = paddle.v2.dataset.common.dict_add
if word_freq == None:
word_freq = {}
if word_freq is None:
word_freq = collections.defaultdict(int)
for l in f:
for w in l.strip().split():
add(word_freq, w)
add(word_freq, '<s>')
add(word_freq, '<e>')
word_freq[w] += 1
word_freq['<s>'] += 1
word_freq['<e>'] += 1
return word_freq

File diff suppressed because it is too large Load Diff

@ -159,7 +159,8 @@ class Parameters(object):
if not self.has_key(key):
raise ValueError("No such parameter %s" % key)
conf = self.__param_conf__[key]
return tuple(map(int, conf.dims))
dims = conf.dims if conf.dims else (1, conf.size)
return tuple(map(int, dims))
def __setitem__(self, key, value):
"""

@ -59,13 +59,13 @@ class ImageLayerTest(unittest.TestCase):
num_channels=16,
pool_type=pooling.Max())
maxout = layer.maxout(input=conv, num_channels=16, groups=4)
print layer.parse_network(maxpool, spp, maxout)
print layer.parse_network([maxpool, spp, maxout])
def test_norm_layer(self):
norm1 = layer.img_cmrnorm(input=conv, size=5)
norm2 = layer.batch_norm(input=conv)
norm3 = layer.sum_to_one_norm(input=conv)
print layer.parse_network(norm1, norm2, norm3)
print layer.parse_network([norm1, norm2, norm3])
class AggregateLayerTest(unittest.TestCase):
@ -78,7 +78,8 @@ class AggregateLayerTest(unittest.TestCase):
first_seq = layer.first_seq(input=pixel)
concat = layer.concat(input=[last_seq, first_seq])
seq_concat = layer.seq_concat(a=last_seq, b=first_seq)
print layer.parse_network(pool, last_seq, first_seq, concat, seq_concat)
print layer.parse_network(
[pool, last_seq, first_seq, concat, seq_concat])
class MathLayerTest(unittest.TestCase):
@ -95,8 +96,10 @@ class MathLayerTest(unittest.TestCase):
tensor = layer.tensor(a=pixel, b=pixel, size=1000)
cos_sim = layer.cos_sim(a=pixel, b=pixel)
trans = layer.trans(input=tensor)
print layer.parse_network(addto, linear_comb, interpolation, power,
scaling, slope, tensor, cos_sim, trans)
print layer.parse_network([
addto, linear_comb, interpolation, power, scaling, slope, tensor,
cos_sim, trans
])
class ReshapeLayerTest(unittest.TestCase):
@ -110,7 +113,8 @@ class ReshapeLayerTest(unittest.TestCase):
repeat = layer.repeat(input=pixel, num_repeats=4)
reshape = layer.seq_reshape(input=pixel, reshape_size=4)
rotate = layer.rotate(input=pixel, height=16, width=49)
print layer.parse_network(block_expand, expand, repeat, reshape, rotate)
print layer.parse_network(
[block_expand, expand, repeat, reshape, rotate])
class RecurrentLayerTest(unittest.TestCase):
@ -119,7 +123,7 @@ class RecurrentLayerTest(unittest.TestCase):
recurrent = layer.recurrent(input=word)
lstm = layer.lstmemory(input=word)
gru = layer.grumemory(input=word)
print layer.parse_network(recurrent, lstm, gru)
print layer.parse_network([recurrent, lstm, gru])
class CostLayerTest(unittest.TestCase):
@ -139,10 +143,10 @@ class CostLayerTest(unittest.TestCase):
cost10 = layer.sum_cost(input=inference)
cost11 = layer.huber_cost(input=score, label=label)
print layer.parse_network(cost1, cost2)
print layer.parse_network(cost3, cost4)
print layer.parse_network(cost5, cost6)
print layer.parse_network(cost7, cost8, cost9, cost10, cost11)
print layer.parse_network([cost1, cost2])
print layer.parse_network([cost3, cost4])
print layer.parse_network([cost5, cost6])
print layer.parse_network([cost7, cost8, cost9, cost10, cost11])
crf = layer.crf(input=inference, label=label)
crf_decoding = layer.crf_decoding(input=inference, size=3)
@ -151,8 +155,8 @@ class CostLayerTest(unittest.TestCase):
nce = layer.nce(input=inference, label=label, num_classes=3)
hsigmoid = layer.hsigmoid(input=inference, label=label, num_classes=3)
print layer.parse_network(crf, crf_decoding, ctc, warp_ctc, nce,
hsigmoid)
print layer.parse_network(
[crf, crf_decoding, ctc, warp_ctc, nce, hsigmoid])
class OtherLayerTest(unittest.TestCase):
@ -160,7 +164,7 @@ class OtherLayerTest(unittest.TestCase):
maxid = layer.max_id(input=inference)
sampling_id = layer.sampling_id(input=inference)
eos = layer.eos(input=maxid, eos_id=5)
print layer.parse_network(maxid, sampling_id, eos)
print layer.parse_network([maxid, sampling_id, eos])
def test_slicing_joining_layer(self):
pad = layer.pad(input=conv, pad_c=[2, 3], pad_h=[1, 2], pad_w=[3, 1])

@ -17,7 +17,6 @@ import collections
from paddle.proto.ModelConfig_pb2 import ModelConfig
import layer as v2_layer
from layer import WithExtraParent
__all__ = ['Topology']
@ -41,9 +40,8 @@ def __bfs_travel__(callback, *layers):
__break__ = callback(each_layer)
if __break__:
return
__layers__ = each_layer.__parent_layers__.values()
if isinstance(each_layer, WithExtraParent):
__layers__ = __layers__ + each_layer.extra_parent()
__layers__ = each_layer.__parent_layers__.values() + \
each_layer.extra_parent()
__bfs_travel__(callback, *__layers__)
@ -53,14 +51,26 @@ class Topology(object):
and network configs.
"""
def __init__(self, layers):
if not isinstance(layers, collections.Sequence):
__check_layer_type__(layers)
layers = [layers]
for layer in layers:
__check_layer_type__(layer)
def __init__(self, layers, extra_layers=None):
def __check__(layers):
if not isinstance(layers, collections.Sequence):
__check_layer_type__(layers)
layers = [layers]
for layer in layers:
__check_layer_type__(layer)
return layers
layers = __check__(layers)
self.layers = layers
self.__model_config__ = v2_layer.parse_network(*layers)
if extra_layers is not None:
extra_layers = __check__(extra_layers)
self.__model_config__ = v2_layer.parse_network(
layers, extra_layers=extra_layers)
if extra_layers is not None:
self.layers.extend(extra_layers)
assert isinstance(self.__model_config__, ModelConfig)
def proto(self):

@ -37,9 +37,12 @@ class SGD(object):
:type cost: paddle.v2.config_base.Layer
:param parameters: The parameters dictionary.
:type parameters: paddle.v2.parameters.Parameters
:param extra_layers: Some layers in the neural network graph are not
in the path of cost layer.
:type extra_layers: paddle.v2.config_base.Layer
"""
def __init__(self, cost, parameters, update_equation):
def __init__(self, cost, parameters, update_equation, extra_layers=None):
if not isinstance(parameters, v2_parameters.Parameters):
raise TypeError('parameters should be parameters')
@ -47,7 +50,7 @@ class SGD(object):
if not isinstance(update_equation, v2_optimizer.Optimizer):
raise TypeError("update equation parameter must be "
"paddle.v2.optimizer.Optimizer")
topology = Topology(cost)
topology = Topology(cost, extra_layers=extra_layers)
self.__optimizer__ = update_equation
self.__topology__ = topology
self.__parameters__ = parameters

Loading…
Cancel
Save