Resolve conflict

avx_docs
Yi Wang 8 years ago
commit f9f2741fd0

@ -8,10 +8,13 @@ os:
env:
- JOB=DOCS
- JOB=BUILD_AND_TEST
- JOB=PRE_COMMIT
matrix:
exclude:
- os: osx
env: JOB=DOCS # Only generate documentation in linux
env: JOB=DOCS # Only generate documentation in linux.
- os: osx
env: JOB=PRE_COMMIT # Only check pre-commit hook in linux
addons:
apt:
@ -39,18 +42,23 @@ addons:
- lcov
- graphviz
- swig
- clang-format-3.8
before_install:
- |
if [ ${JOB} == "BUILD_AND_TEST" ]; then
if ! git diff --name-only $TRAVIS_COMMIT_RANGE | grep -qvE '(\.md$)|(\.rst$)|(\.jpg$)|(\.png$)'
then
echo "Only markdown docs were updated, stopping build process."
exit
local change_list=`git diff --name-only $TRAVIS_COMMIT_RANGE`
if [ $? -eq 0 ]; then # if git diff return no zero, then rerun unit test.
if ! echo ${change_list} | grep -qvE '(\.md$)|(\.rst$)|(\.jpg$)|(\.png$)'
then
echo "Only markdown docs were updated, stopping build process."
exit
fi
fi
fi
- if [[ "$TRAVIS_OS_NAME" == "linux" ]]; then sudo paddle/scripts/travis/before_install.linux.sh; fi
- if [[ "$TRAVIS_OS_NAME" == "osx" ]]; then paddle/scripts/travis/before_install.osx.sh; fi
- pip install wheel protobuf sphinx recommonmark virtualenv numpy sphinx_rtd_theme
- if [[ "$JOB" == "PRE_COMMIT" ]]; then sudo ln -s /usr/bin/clang-format-3.8 /usr/bin/clang-format; fi
- pip install wheel protobuf sphinx recommonmark virtualenv numpy sphinx_rtd_theme pre-commit
script:
- paddle/scripts/travis/main.sh
notifications:

@ -1,10 +1,9 @@
# External dependency to Google protobuf.
http_archive(
name = "protobuf",
url = "http://github.com/google/protobuf/archive/v3.1.0.tar.gz",
sha256 = "0a0ae63cbffc274efb573bdde9a253e3f32e458c41261df51c5dbc5ad541e8f7",
strip_prefix = "protobuf-3.1.0",
)
name="protobuf",
url="http://github.com/google/protobuf/archive/v3.1.0.tar.gz",
sha256="0a0ae63cbffc274efb573bdde9a253e3f32e458c41261df51c5dbc5ad541e8f7",
strip_prefix="protobuf-3.1.0", )
# External dependency to gtest 1.7.0. This method comes from
# https://www.bazel.io/versions/master/docs/tutorial/cpp.html.

@ -25,4 +25,3 @@ test 4 2 256 512
test 4 2 512 128
test 4 2 512 256
test 4 2 512 512

@ -10,4 +10,4 @@ Then you can run the command below. The flag -d specifies the training data (cif
$python gan_trainer.py -d cifar --use_gpu 1
The generated images will be stored in ./cifar_samples/
The corresponding models will be stored in ./cifar_params/
The corresponding models will be stored in ./cifar_params/

@ -15,4 +15,3 @@ set -e
wget https://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz
tar zxf cifar-10-python.tar.gz
rm cifar-10-python.tar.gz

@ -15,5 +15,3 @@ do
gunzip ${fname}.gz
fi
done

@ -14,10 +14,9 @@
from paddle.trainer_config_helpers import *
mode = get_config_arg("mode", str, "generator")
assert mode in set(["generator",
"discriminator",
"generator_training",
"discriminator_training"])
assert mode in set([
"generator", "discriminator", "generator_training", "discriminator_training"
])
is_generator_training = mode == "generator_training"
is_discriminator_training = mode == "discriminator_training"
@ -38,8 +37,8 @@ sample_dim = 2
settings(
batch_size=128,
learning_rate=1e-4,
learning_method=AdamOptimizer(beta1=0.5)
)
learning_method=AdamOptimizer(beta1=0.5))
def discriminator(sample):
"""
@ -50,70 +49,87 @@ def discriminator(sample):
of the sample is from real data.
"""
param_attr = ParamAttr(is_static=is_generator_training)
bias_attr = ParamAttr(is_static=is_generator_training,
initial_mean=1.0,
initial_std=0)
hidden = fc_layer(input=sample, name="dis_hidden", size=hidden_dim,
bias_attr=bias_attr,
param_attr=param_attr,
act=ReluActivation())
hidden2 = fc_layer(input=hidden, name="dis_hidden2", size=hidden_dim,
bias_attr=bias_attr,
param_attr=param_attr,
act=LinearActivation())
hidden_bn = batch_norm_layer(hidden2,
act=ReluActivation(),
name="dis_hidden_bn",
bias_attr=bias_attr,
param_attr=ParamAttr(is_static=is_generator_training,
initial_mean=1.0,
initial_std=0.02),
use_global_stats=False)
return fc_layer(input=hidden_bn, name="dis_prob", size=2,
bias_attr=bias_attr,
param_attr=param_attr,
act=SoftmaxActivation())
bias_attr = ParamAttr(
is_static=is_generator_training, initial_mean=1.0, initial_std=0)
hidden = fc_layer(
input=sample,
name="dis_hidden",
size=hidden_dim,
bias_attr=bias_attr,
param_attr=param_attr,
act=ReluActivation())
hidden2 = fc_layer(
input=hidden,
name="dis_hidden2",
size=hidden_dim,
bias_attr=bias_attr,
param_attr=param_attr,
act=LinearActivation())
hidden_bn = batch_norm_layer(
hidden2,
act=ReluActivation(),
name="dis_hidden_bn",
bias_attr=bias_attr,
param_attr=ParamAttr(
is_static=is_generator_training, initial_mean=1.0,
initial_std=0.02),
use_global_stats=False)
return fc_layer(
input=hidden_bn,
name="dis_prob",
size=2,
bias_attr=bias_attr,
param_attr=param_attr,
act=SoftmaxActivation())
def generator(noise):
"""
generator generates a sample given noise
"""
param_attr = ParamAttr(is_static=is_discriminator_training)
bias_attr = ParamAttr(is_static=is_discriminator_training,
initial_mean=1.0,
initial_std=0)
hidden = fc_layer(input=noise,
name="gen_layer_hidden",
size=hidden_dim,
bias_attr=bias_attr,
param_attr=param_attr,
act=ReluActivation())
hidden2 = fc_layer(input=hidden, name="gen_hidden2", size=hidden_dim,
bias_attr=bias_attr,
param_attr=param_attr,
act=LinearActivation())
hidden_bn = batch_norm_layer(hidden2,
act=ReluActivation(),
name="gen_layer_hidden_bn",
bias_attr=bias_attr,
param_attr=ParamAttr(is_static=is_discriminator_training,
initial_mean=1.0,
initial_std=0.02),
use_global_stats=False)
return fc_layer(input=hidden_bn,
name="gen_layer1",
size=sample_dim,
bias_attr=bias_attr,
param_attr=param_attr,
act=LinearActivation())
bias_attr = ParamAttr(
is_static=is_discriminator_training, initial_mean=1.0, initial_std=0)
hidden = fc_layer(
input=noise,
name="gen_layer_hidden",
size=hidden_dim,
bias_attr=bias_attr,
param_attr=param_attr,
act=ReluActivation())
hidden2 = fc_layer(
input=hidden,
name="gen_hidden2",
size=hidden_dim,
bias_attr=bias_attr,
param_attr=param_attr,
act=LinearActivation())
hidden_bn = batch_norm_layer(
hidden2,
act=ReluActivation(),
name="gen_layer_hidden_bn",
bias_attr=bias_attr,
param_attr=ParamAttr(
is_static=is_discriminator_training,
initial_mean=1.0,
initial_std=0.02),
use_global_stats=False)
return fc_layer(
input=hidden_bn,
name="gen_layer1",
size=sample_dim,
bias_attr=bias_attr,
param_attr=param_attr,
act=LinearActivation())
if is_generator_training:
noise = data_layer(name="noise", size=noise_dim)
@ -126,7 +142,8 @@ if is_generator_training or is_discriminator_training:
label = data_layer(name="label", size=1)
prob = discriminator(sample)
cost = cross_entropy(input=prob, label=label)
classification_error_evaluator(input=prob, label=label, name=mode+'_error')
classification_error_evaluator(
input=prob, label=label, name=mode + '_error')
outputs(cost)
if is_generator:

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

@ -13,7 +13,6 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""
This configuration is a demonstration of how to implement the stacked LSTM
with residual connections, i.e. an LSTM layer takes the sum of the hidden states
@ -46,11 +45,12 @@ is_predict = get_config_arg('is_predict', bool, False)
trn = 'data/train.list' if not is_predict else None
tst = 'data/test.list' if not is_predict else 'data/pred.list'
process = 'process' if not is_predict else 'process_predict'
define_py_data_sources2(train_list=trn,
test_list=tst,
module="dataprovider_emb",
obj=process,
args={"dictionary": word_dict})
define_py_data_sources2(
train_list=trn,
test_list=tst,
module="dataprovider_emb",
obj=process,
args={"dictionary": word_dict})
batch_size = 128 if not is_predict else 1
settings(
@ -58,10 +58,9 @@ settings(
learning_rate=2e-3,
learning_method=AdamOptimizer(),
regularization=L2Regularization(8e-4),
gradient_clipping_threshold=25
)
gradient_clipping_threshold=25)
bias_attr = ParamAttr(initial_std=0.,l2_rate=0.)
bias_attr = ParamAttr(initial_std=0., l2_rate=0.)
data = data_layer(name="word", size=len(word_dict))
emb = embedding_layer(input=data, size=128)
@ -73,17 +72,15 @@ for i in range(3):
# The input to the current layer is the sum of the hidden state
# and input of the previous layer.
current_input = addto_layer(input=[previous_input, previous_hidden_state])
hidden_state = simple_lstm(input=current_input, size=128,
lstm_cell_attr=ExtraAttr(drop_rate=0.1))
hidden_state = simple_lstm(
input=current_input, size=128, lstm_cell_attr=ExtraAttr(drop_rate=0.1))
previous_input, previous_hidden_state = current_input, hidden_state
lstm = previous_hidden_state
lstm_last = pooling_layer(input=lstm, pooling_type=MaxPooling())
output = fc_layer(input=lstm_last, size=2,
bias_attr=bias_attr,
act=SoftmaxActivation())
output = fc_layer(
input=lstm_last, size=2, bias_attr=bias_attr, act=SoftmaxActivation())
if is_predict:
maxid = maxid_layer(output)

@ -33,7 +33,7 @@ def extract_dict_features(pair_file, feature_file):
ctx_n1 = sentence_list[verb_index - 1]
else:
ctx_n1 = 'bos'
if verb_index > 1:
mark[verb_index - 2] = 1
ctx_n2 = sentence_list[verb_index - 2]
@ -48,7 +48,7 @@ def extract_dict_features(pair_file, feature_file):
ctx_p1 = sentence_list[verb_index + 1]
else:
ctx_p1 = 'eos'
if verb_index < len(labels_list) - 3:
mark[verb_index + 2] = 1
ctx_p2 = sentence_list[verb_index + 2]
@ -69,7 +69,6 @@ def extract_dict_features(pair_file, feature_file):
feature_out.write(feature_str + '\n')
if __name__ == '__main__':
usage = '-p pair_file -f feature_file'

@ -66,8 +66,8 @@ def transform_labels(sentences, labels):
else:
verb_list = []
for x in labels[i][0]:
if x !='-':
verb_list.append(x)
if x != '-':
verb_list.append(x)
for j in xrange(1, len(labels[i])):
label_list = labels[i][j]
@ -93,7 +93,7 @@ def transform_labels(sentences, labels):
is_in_bracket = True
else:
print 'error:', ll
sen_lab_pair.append((sentences[i], verb_list[j-1], label_seq))
sen_lab_pair.append((sentences[i], verb_list[j - 1], label_seq))
return sen_lab_pair
@ -103,7 +103,7 @@ def write_file(sen_lab_pair, output_file):
sentence = x[0]
label_seq = ' '.join(x[2])
assert len(sentence.split()) == len(x[2])
fout.write(sentence + '\t' + x[1]+'\t' +label_seq + '\n')
fout.write(sentence + '\t' + x[1] + '\t' + label_seq + '\n')
if __name__ == '__main__':

@ -21,7 +21,7 @@ def hook(settings, word_dict, label_dict, predicate_dict, **kwargs):
settings.word_dict = word_dict
settings.label_dict = label_dict
settings.predicate_dict = predicate_dict
#all inputs are integral and sequential type
settings.slots = [
integer_value_sequence(len(word_dict)),
@ -29,25 +29,28 @@ def hook(settings, word_dict, label_dict, predicate_dict, **kwargs):
integer_value_sequence(len(word_dict)),
integer_value_sequence(len(word_dict)),
integer_value_sequence(len(word_dict)),
integer_value_sequence(len(word_dict)),
integer_value_sequence(len(predicate_dict)),
integer_value_sequence(2),
integer_value_sequence(len(word_dict)),
integer_value_sequence(len(predicate_dict)), integer_value_sequence(2),
integer_value_sequence(len(label_dict))
]
def get_batch_size(yeild_data):
return len(yeild_data[0])
@provider(init_hook=hook, should_shuffle=True, calc_batch_size=get_batch_size,
can_over_batch_size=False, cache=CacheType.CACHE_PASS_IN_MEM)
@provider(
init_hook=hook,
should_shuffle=True,
calc_batch_size=get_batch_size,
can_over_batch_size=False,
cache=CacheType.CACHE_PASS_IN_MEM)
def process(settings, file_name):
with open(file_name, 'r') as fdata:
for line in fdata:
sentence, predicate, ctx_n2, ctx_n1, ctx_0, ctx_p1, ctx_p2, mark, label = \
line.strip().split('\t')
words = sentence.split()
sen_len = len(words)
word_slot = [settings.word_dict.get(w, UNK_IDX) for w in words]

@ -20,7 +20,7 @@ from paddle.trainer_config_helpers import *
#file paths
word_dict_file = './data/wordDict.txt'
label_dict_file = './data/targetDict.txt'
predicate_file= './data/verbDict.txt'
predicate_file = './data/verbDict.txt'
train_list_file = './data/train.list'
test_list_file = './data/test.list'
@ -47,7 +47,6 @@ if not is_predict:
w = line.strip()
predicate_dict[w] = i
if is_test:
train_list_file = None
@ -57,9 +56,11 @@ if not is_predict:
test_list=test_list_file,
module='dataprovider',
obj='process',
args={'word_dict': word_dict,
'label_dict': label_dict,
'predicate_dict': predicate_dict })
args={
'word_dict': word_dict,
'label_dict': label_dict,
'predicate_dict': predicate_dict
})
word_dict_len = len(word_dict)
label_dict_len = len(label_dict)
@ -77,24 +78,16 @@ mark_dim = 5
hidden_dim = 512
depth = 8
########################### Optimizer #######################################
settings(
batch_size=150,
learning_method=MomentumOptimizer(momentum=0),
learning_rate=2e-2,
regularization=L2Regularization(8e-4),
is_async=False,
model_average=ModelAverage(average_window=0.5,
max_average_window=10000),
)
model_average=ModelAverage(
average_window=0.5, max_average_window=10000), )
####################################### network ##############################
#8 features and 1 target
@ -108,22 +101,28 @@ ctx_p1 = data_layer(name='ctx_p1_data', size=word_dict_len)
ctx_p2 = data_layer(name='ctx_p2_data', size=word_dict_len)
mark = data_layer(name='mark_data', size=mark_dict_len)
if not is_predict:
target = data_layer(name='target', size=label_dict_len)
default_std=1/math.sqrt(hidden_dim)/3.0
default_std = 1 / math.sqrt(hidden_dim) / 3.0
emb_para = ParameterAttribute(name='emb', initial_std=0., learning_rate=0.)
std_0 = ParameterAttribute(initial_std=0.)
std_default = ParameterAttribute(initial_std=default_std)
predicate_embedding = embedding_layer(size=word_dim, input=predicate, param_attr=ParameterAttribute(name='vemb',initial_std=default_std))
mark_embedding = embedding_layer(name='word_ctx-in_embedding', size=mark_dim, input=mark, param_attr=std_0)
word_input=[word, ctx_n2, ctx_n1, ctx_0, ctx_p1, ctx_p2]
emb_layers = [embedding_layer(size=word_dim, input=x, param_attr=emb_para) for x in word_input]
std_default = ParameterAttribute(initial_std=default_std)
predicate_embedding = embedding_layer(
size=word_dim,
input=predicate,
param_attr=ParameterAttribute(
name='vemb', initial_std=default_std))
mark_embedding = embedding_layer(
name='word_ctx-in_embedding', size=mark_dim, input=mark, param_attr=std_0)
word_input = [word, ctx_n2, ctx_n1, ctx_0, ctx_p1, ctx_p2]
emb_layers = [
embedding_layer(
size=word_dim, input=x, param_attr=emb_para) for x in word_input
]
emb_layers.append(predicate_embedding)
emb_layers.append(mark_embedding)
@ -131,84 +130,89 @@ hidden_0 = mixed_layer(
name='hidden0',
size=hidden_dim,
bias_attr=std_default,
input=[ full_matrix_projection(input=emb, param_attr=std_default ) for emb in emb_layers ])
input=[
full_matrix_projection(
input=emb, param_attr=std_default) for emb in emb_layers
])
mix_hidden_lr = 1e-3
lstm_para_attr = ParameterAttribute(initial_std=0.0, learning_rate=1.0)
hidden_para_attr = ParameterAttribute(initial_std=default_std, learning_rate=mix_hidden_lr)
lstm_0 = lstmemory(name='lstm0',
input=hidden_0,
act=ReluActivation(),
gate_act=SigmoidActivation(),
state_act=SigmoidActivation(),
bias_attr=std_0,
param_attr=lstm_para_attr)
hidden_para_attr = ParameterAttribute(
initial_std=default_std, learning_rate=mix_hidden_lr)
lstm_0 = lstmemory(
name='lstm0',
input=hidden_0,
act=ReluActivation(),
gate_act=SigmoidActivation(),
state_act=SigmoidActivation(),
bias_attr=std_0,
param_attr=lstm_para_attr)
#stack L-LSTM and R-LSTM with direct edges
input_tmp = [hidden_0, lstm_0]
for i in range(1, depth):
mix_hidden = mixed_layer(name='hidden'+str(i),
size=hidden_dim,
bias_attr=std_default,
input=[full_matrix_projection(input=input_tmp[0], param_attr=hidden_para_attr),
full_matrix_projection(input=input_tmp[1], param_attr=lstm_para_attr)
]
)
lstm = lstmemory(name='lstm'+str(i),
input=mix_hidden,
act=ReluActivation(),
gate_act=SigmoidActivation(),
state_act=SigmoidActivation(),
reverse=((i % 2)==1),
bias_attr=std_0,
param_attr=lstm_para_attr)
mix_hidden = mixed_layer(
name='hidden' + str(i),
size=hidden_dim,
bias_attr=std_default,
input=[
full_matrix_projection(
input=input_tmp[0], param_attr=hidden_para_attr),
full_matrix_projection(
input=input_tmp[1], param_attr=lstm_para_attr)
])
lstm = lstmemory(
name='lstm' + str(i),
input=mix_hidden,
act=ReluActivation(),
gate_act=SigmoidActivation(),
state_act=SigmoidActivation(),
reverse=((i % 2) == 1),
bias_attr=std_0,
param_attr=lstm_para_attr)
input_tmp = [mix_hidden, lstm]
feature_out = mixed_layer(name='output',
size=label_dict_len,
bias_attr=std_default,
input=[full_matrix_projection(input=input_tmp[0], param_attr=hidden_para_attr),
full_matrix_projection(input=input_tmp[1], param_attr=lstm_para_attr)
],
)
feature_out = mixed_layer(
name='output',
size=label_dict_len,
bias_attr=std_default,
input=[
full_matrix_projection(
input=input_tmp[0], param_attr=hidden_para_attr),
full_matrix_projection(
input=input_tmp[1], param_attr=lstm_para_attr)
], )
if not is_predict:
crf_l = crf_layer( name = 'crf',
size = label_dict_len,
input = feature_out,
label = target,
param_attr=ParameterAttribute(name='crfw',initial_std=default_std, learning_rate=mix_hidden_lr)
)
crf_dec_l = crf_decoding_layer(name = 'crf_dec_l',
size = label_dict_len,
input = feature_out,
label = target,
param_attr=ParameterAttribute(name='crfw')
)
crf_l = crf_layer(
name='crf',
size=label_dict_len,
input=feature_out,
label=target,
param_attr=ParameterAttribute(
name='crfw', initial_std=default_std, learning_rate=mix_hidden_lr))
crf_dec_l = crf_decoding_layer(
name='crf_dec_l',
size=label_dict_len,
input=feature_out,
label=target,
param_attr=ParameterAttribute(name='crfw'))
eval = sum_evaluator(input=crf_dec_l)
outputs(crf_l)
else:
crf_dec_l = crf_decoding_layer(name = 'crf_dec_l',
size = label_dict_len,
input = feature_out,
param_attr=ParameterAttribute(name='crfw')
)
crf_dec_l = crf_decoding_layer(
name='crf_dec_l',
size=label_dict_len,
input=feature_out,
param_attr=ParameterAttribute(name='crfw'))
outputs(crf_dec_l)

@ -26,7 +26,8 @@ UNK_IDX = 0
class Prediction():
def __init__(self, train_conf, dict_file, model_dir, label_file, predicate_dict_file):
def __init__(self, train_conf, dict_file, model_dir, label_file,
predicate_dict_file):
"""
train_conf: trainer configure.
dict_file: word dictionary file name.
@ -35,7 +36,7 @@ class Prediction():
self.dict = {}
self.labels = {}
self.predicate_dict={}
self.predicate_dict = {}
self.labels_reverse = {}
self.load_dict_label(dict_file, label_file, predicate_dict_file)
@ -44,25 +45,18 @@ class Prediction():
len_pred = len(self.predicate_dict)
conf = parse_config(
train_conf,
'dict_len=' + str(len_dict) +
',label_len=' + str(len_label) +
',pred_len=' + str(len_pred) +
',is_predict=True')
train_conf, 'dict_len=' + str(len_dict) + ',label_len=' +
str(len_label) + ',pred_len=' + str(len_pred) + ',is_predict=True')
self.network = swig_paddle.GradientMachine.createFromConfigProto(
conf.model_config)
self.network.loadParameters(model_dir)
slots = [
integer_value_sequence(len_dict),
integer_value_sequence(len_dict),
integer_value_sequence(len_dict),
integer_value_sequence(len_dict),
integer_value_sequence(len_dict),
integer_value_sequence(len_dict),
integer_value_sequence(len_pred),
integer_value_sequence(2)
]
integer_value_sequence(len_dict), integer_value_sequence(len_dict),
integer_value_sequence(len_dict), integer_value_sequence(len_dict),
integer_value_sequence(len_dict), integer_value_sequence(len_dict),
integer_value_sequence(len_pred), integer_value_sequence(2)
]
self.converter = DataProviderConverter(slots)
def load_dict_label(self, dict_file, label_file, predicate_dict_file):
@ -78,6 +72,7 @@ class Prediction():
for line_count, line in enumerate(open(predicate_dict_file, 'r')):
self.predicate_dict[line.strip()] = line_count
def get_data(self, data_file):
"""
Get input data of paddle format.
@ -88,9 +83,10 @@ class Prediction():
).split('\t')
words = sentence.split()
sen_len = len(words)
word_slot = [self.dict.get(w, UNK_IDX) for w in words]
predicate_slot = [self.predicate_dict.get(predicate, UNK_IDX)] * sen_len
predicate_slot = [self.predicate_dict.get(predicate, UNK_IDX)
] * sen_len
ctx_n2_slot = [self.dict.get(ctx_n2, UNK_IDX)] * sen_len
ctx_n1_slot = [self.dict.get(ctx_n1, UNK_IDX)] * sen_len
ctx_0_slot = [self.dict.get(ctx_0, UNK_IDX)] * sen_len
@ -99,7 +95,7 @@ class Prediction():
marks = mark.split()
mark_slot = [int(w) for w in marks]
yield word_slot, ctx_n2_slot, ctx_n1_slot, \
ctx_0_slot, ctx_p1_slot, ctx_p2_slot, predicate_slot, mark_slot
@ -123,8 +119,9 @@ class Prediction():
def option_parser():
usage = ("python predict.py -c config -w model_dir "
"-d word dictionary -l label_file -i input_file -p pred_dict_file")
usage = (
"python predict.py -c config -w model_dir "
"-d word dictionary -l label_file -i input_file -p pred_dict_file")
parser = OptionParser(usage="usage: %s [options]" % usage)
parser.add_option(
"-c",
@ -187,8 +184,9 @@ def main():
output_file = options.output_file
swig_paddle.initPaddle("--use_gpu=0")
predict = Prediction(train_conf, dict_file, model_path, label_file, predict_dict_file)
predict.predict(data_file,output_file)
predict = Prediction(train_conf, dict_file, model_path, label_file,
predict_dict_file)
predict.predict(data_file, output_file)
if __name__ == '__main__':

@ -71,9 +71,7 @@ class SentimentPrediction():
transform word into integer index according to the dictionary.
"""
words = data.strip().split()
word_slot = [
self.word_dict[w] for w in words if w in self.word_dict
]
word_slot = [self.word_dict[w] for w in words if w in self.word_dict]
return word_slot
def batch_predict(self, data_batch):
@ -85,8 +83,8 @@ class SentimentPrediction():
if self.label is None:
print("predicting label is %d" % (lab[0]))
else:
print("predicting label is %s" %
(self.label[lab[0]]))
print("predicting label is %s" % (self.label[lab[0]]))
def option_parser():
usage = "python predict.py -n config -w model_dir -d dictionary -i input_file "
@ -143,9 +141,10 @@ def main():
batch.append([predict.get_index(line)])
if len(batch) == batch_size:
predict.batch_predict(batch)
batch=[]
batch = []
if len(batch) > 0:
predict.batch_predict(batch)
if __name__ == '__main__':
main()

@ -1,4 +1,4 @@
.. _api_pydataprovider:
.. _api_pydataprovider2_en:
PyDataProvider2
===============
@ -104,6 +104,8 @@ And PaddlePadle will do all of the rest things\:
Is this cool?
.. _api_pydataprovider2_en_sequential_model:
DataProvider for the sequential model
-------------------------------------
A sequence model takes sequences as its input. A sequence is made up of several

@ -23,7 +23,7 @@ python's :code:`help()` function. Let's walk through the above python script:
* At the beginning, use :code:`swig_paddle.initPaddle()` to initialize
PaddlePaddle with command line arguments, for more about command line arguments
see `Command Line Arguments <../cmd_argument/detail_introduction.html>`_.
see :ref:`cmd_detail_introduction_en` .
* Parse the configuration file that is used in training with :code:`parse_config()`.
Because data to predict with always have no label, and output of prediction work
normally is the output layer rather than the cost layer, so you should modify
@ -36,7 +36,7 @@ python's :code:`help()` function. Let's walk through the above python script:
- Note: As swig_paddle can only accept C++ matrices, we offer a utility
class DataProviderConverter that can accept the same input data with
PyDataProvider2, for more information please refer to document
of `PyDataProvider2 <../data_provider/pydataprovider2.html>`_.
of :ref:`api_pydataprovider2_en` .
* Do the prediction with :code:`forwardTest()`, which takes the converted
input data and outputs the activations of the output layer.

@ -1,3 +1,5 @@
.. _api_trainer_config_helpers_layers:
======
Layers
======

@ -99,11 +99,3 @@ In PaddlePaddle, training is just to get a collection of model parameters, which
Although starts from a random guess, you can see that value of ``w`` changes quickly towards 2 and ``b`` changes quickly towards 0.3. In the end, the predicted line is almost identical with real answer.
There, you have recovered the underlying pattern between ``X`` and ``Y`` only from observed data.
5. Where to Go from Here
-------------------------
- `Install and Build <../build_and_install/index.html>`_
- `Tutorials <../demo/quick_start/index_en.html>`_
- `Example and Demo <../demo/index.html>`_

@ -14,6 +14,13 @@ cd paddle
git submodule update --init --recursive
```
If you already have a local PaddlePaddle repo and have not initialized the submodule, your local submodule folder will be empty. You can simply run the last line of the above codes in your PaddlePaddle home directory to initialize your submodule folder.
If you have already initialized your submodule and you would like to sync with the upstream submodule repo, you can run the following command
```
git submodule update --remote
```
## <span id="requirements">Requirements</span>
To compile the source code, your computer must be equipped with the following dependencies.

@ -122,9 +122,9 @@ The general development workflow with Docker and Bazel is as follows:
git clone --recursive https://github.com/paddlepaddle/paddle
2. Build a development Docker image `paddle:dev` from the source code.
This image contains all the development tools and dependencies of
PaddlePaddle.
2. Build a development Docker image :code:`paddle:dev` from the source
code. This image contains all the development tools and
dependencies of PaddlePaddle.
.. code-block:: bash
@ -139,14 +139,22 @@ The general development workflow with Docker and Bazel is as follows:
.. code-block:: bash
docker run \
-d # run the container in background mode \
--name paddle # we can run a nginx container to serve documents \
-p 2022:22 # so we can SSH into this container \
-v $PWD:/paddle # mount the source code \
-v $HOME/.cache/bazel:/root/.cache/bazel # mount Bazel cache \
docker run \
-d \
--name paddle \
-p 2022:22 \
-v $PWD:/paddle \
-v $HOME/.cache/bazel:/root/.cache/bazel \
paddle:dev
where :code:`-d` makes the container running in background,
:code:`--name paddle` allows us to run a nginx container to serve
documents in this container, :code:`-p 2022:22` allows us to SSH
into this container, :code:`-v $PWD:/paddle` shares the source code
on the host with the container, :code:`-v
$HOME/.cache/bazel:/root/.cache/bazel` shares Bazel cache on the
host with the container.
4. SSH into the container:
.. code-block:: bash

@ -1,3 +1,7 @@
```eval_rst
.. _cmd_detail_introduction_en:
```
# Detail Description
## Common

@ -30,7 +30,7 @@ Then at the :code:`process` function, each :code:`yield` function will return th
yield src_ids, trg_ids, trg_ids_next
For more details description of how to write a data provider, please refer to `PyDataProvider2 <../../ui/data_provider/index.html>`_. The full data provider file is located at :code:`demo/seqToseq/dataprovider.py`.
For more details description of how to write a data provider, please refer to :ref:`api_pydataprovider2_en` . The full data provider file is located at :code:`demo/seqToseq/dataprovider.py`.
===============================================
Configure Recurrent Neural Network Architecture
@ -106,7 +106,7 @@ We will use the sequence to sequence model with attention as an example to demon
In this model, the source sequence :math:`S = \{s_1, \dots, s_T\}` is encoded with a bidirectional gated recurrent neural networks. The hidden states of the bidirectional gated recurrent neural network :math:`H_S = \{H_1, \dots, H_T\}` is called *encoder vector* The decoder is a gated recurrent neural network. When decoding each token :math:`y_t`, the gated recurrent neural network generates a set of weights :math:`W_S^t = \{W_1^t, \dots, W_T^t\}`, which are used to compute a weighted sum of the encoder vector. The weighted sum of the encoder vector is utilized to condition the generation of the token :math:`y_t`.
The encoder part of the model is listed below. It calls :code:`grumemory` to represent gated recurrent neural network. It is the recommended way of using recurrent neural network if the network architecture is simple, because it is faster than :code:`recurrent_group`. We have implemented most of the commonly used recurrent neural network architectures, you can refer to `Layers <../../ui/api/trainer_config_helpers/layers_index.html>`_ for more details.
The encoder part of the model is listed below. It calls :code:`grumemory` to represent gated recurrent neural network. It is the recommended way of using recurrent neural network if the network architecture is simple, because it is faster than :code:`recurrent_group`. We have implemented most of the commonly used recurrent neural network architectures, you can refer to :ref:`api_trainer_config_helpers_layers` for more details.
We also project the encoder vector to :code:`decoder_size` dimensional space, get the first instance of the backward recurrent network, and project it to :code:`decoder_size` dimensional space:
@ -246,6 +246,6 @@ The code is listed below:
outputs(beam_gen)
Notice that this generation technique is only useful for decoder like generation process. If you are working on sequence tagging tasks, please refer to `Semantic Role Labeling Demo <../../demo/semantic_role_labeling/index.html>`_ for more details.
Notice that this generation technique is only useful for decoder like generation process. If you are working on sequence tagging tasks, please refer to :ref:`semantic_role_labeling_en` for more details.
The full configuration file is located at :code:`demo/seqToseq/seqToseq_net.py`.

@ -51,7 +51,7 @@ In this tutorial, we will focus on nvprof and nvvp.
:code:`test_GpuProfiler` from :code:`paddle/math/tests` directory will be used to evaluate
above profilers.
.. literalinclude:: ../../paddle/math/tests/test_GpuProfiler.cpp
.. literalinclude:: ../../../paddle/math/tests/test_GpuProfiler.cpp
:language: c++
:lines: 111-124
:linenos:
@ -77,7 +77,7 @@ As a simple example, consider the following:
1. Add :code:`REGISTER_TIMER_INFO` and :code:`printAllStatus` functions (see the emphasize-lines).
.. literalinclude:: ../../paddle/math/tests/test_GpuProfiler.cpp
.. literalinclude:: ../../../paddle/math/tests/test_GpuProfiler.cpp
:language: c++
:lines: 111-124
:emphasize-lines: 8-10,13
@ -124,7 +124,7 @@ To use this command line profiler **nvprof**, you can simply issue the following
1. Add :code:`REGISTER_GPU_PROFILER` function (see the emphasize-lines).
.. literalinclude:: ../../paddle/math/tests/test_GpuProfiler.cpp
.. literalinclude:: ../../../paddle/math/tests/test_GpuProfiler.cpp
:language: c++
:lines: 111-124
:emphasize-lines: 6-7

Some files were not shown because too many files have changed in this diff Show More

Loading…
Cancel
Save