Merge pull request #829 from reyoung/feature/travis_pre_commit_checks

Feature/travis pre commit checks
avx_docs
gangliao 8 years ago committed by GitHub
commit 1adc6a288e

@ -8,10 +8,13 @@ os:
env: env:
- JOB=DOCS - JOB=DOCS
- JOB=BUILD_AND_TEST - JOB=BUILD_AND_TEST
- JOB=PRE_COMMIT
matrix: matrix:
exclude: exclude:
- os: osx - os: osx
env: JOB=DOCS # Only generate documentation in linux env: JOB=DOCS # Only generate documentation in linux.
- os: osx
env: JOB=PRE_COMMIT # Only check pre-commit hook in linux
addons: addons:
apt: apt:
@ -39,6 +42,7 @@ addons:
- lcov - lcov
- graphviz - graphviz
- swig - swig
- clang-format-3.8
before_install: before_install:
- | - |
if [ ${JOB} == "BUILD_AND_TEST" ]; then if [ ${JOB} == "BUILD_AND_TEST" ]; then
@ -53,7 +57,8 @@ before_install:
fi fi
- if [[ "$TRAVIS_OS_NAME" == "linux" ]]; then sudo paddle/scripts/travis/before_install.linux.sh; fi - if [[ "$TRAVIS_OS_NAME" == "linux" ]]; then sudo paddle/scripts/travis/before_install.linux.sh; fi
- if [[ "$TRAVIS_OS_NAME" == "osx" ]]; then paddle/scripts/travis/before_install.osx.sh; fi - if [[ "$TRAVIS_OS_NAME" == "osx" ]]; then paddle/scripts/travis/before_install.osx.sh; fi
- pip install wheel protobuf sphinx recommonmark virtualenv numpy sphinx_rtd_theme - if [[ "$JOB" == "PRE_COMMIT" ]]; then sudo ln -s /usr/bin/clang-format-3.8 /usr/bin/clang-format; fi
- pip install wheel protobuf sphinx recommonmark virtualenv numpy sphinx_rtd_theme pre-commit
script: script:
- paddle/scripts/travis/main.sh - paddle/scripts/travis/main.sh
notifications: notifications:

@ -1,17 +1,15 @@
# External dependency to Google protobuf. # External dependency to Google protobuf.
http_archive( http_archive(
name = "protobuf", name="protobuf",
url = "http://github.com/google/protobuf/archive/v3.1.0.tar.gz", url="http://github.com/google/protobuf/archive/v3.1.0.tar.gz",
sha256 = "0a0ae63cbffc274efb573bdde9a253e3f32e458c41261df51c5dbc5ad541e8f7", sha256="0a0ae63cbffc274efb573bdde9a253e3f32e458c41261df51c5dbc5ad541e8f7",
strip_prefix = "protobuf-3.1.0", strip_prefix="protobuf-3.1.0", )
)
# External dependency to gtest 1.7.0. This method comes from # External dependency to gtest 1.7.0. This method comes from
# https://www.bazel.io/versions/master/docs/tutorial/cpp.html. # https://www.bazel.io/versions/master/docs/tutorial/cpp.html.
new_http_archive( new_http_archive(
name = "gtest", name="gtest",
url = "https://github.com/google/googletest/archive/release-1.7.0.zip", url="https://github.com/google/googletest/archive/release-1.7.0.zip",
sha256 = "b58cb7547a28b2c718d1e38aee18a3659c9e3ff52440297e965f5edffe34b6d0", sha256="b58cb7547a28b2c718d1e38aee18a3659c9e3ff52440297e965f5edffe34b6d0",
build_file = "third_party/gtest.BUILD", build_file="third_party/gtest.BUILD",
strip_prefix = "googletest-release-1.7.0", strip_prefix="googletest-release-1.7.0", )
)

@ -25,4 +25,3 @@ test 4 2 256 512
test 4 2 512 128 test 4 2 512 128
test 4 2 512 256 test 4 2 512 256
test 4 2 512 512 test 4 2 512 512

@ -10,4 +10,4 @@ Then you can run the command below. The flag -d specifies the training data (cif
$python gan_trainer.py -d cifar --use_gpu 1 $python gan_trainer.py -d cifar --use_gpu 1
The generated images will be stored in ./cifar_samples/ The generated images will be stored in ./cifar_samples/
The corresponding models will be stored in ./cifar_params/ The corresponding models will be stored in ./cifar_params/

@ -15,4 +15,3 @@ set -e
wget https://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz wget https://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz
tar zxf cifar-10-python.tar.gz tar zxf cifar-10-python.tar.gz
rm cifar-10-python.tar.gz rm cifar-10-python.tar.gz

@ -15,5 +15,3 @@ do
gunzip ${fname}.gz gunzip ${fname}.gz
fi fi
done done

@ -14,10 +14,9 @@
from paddle.trainer_config_helpers import * from paddle.trainer_config_helpers import *
mode = get_config_arg("mode", str, "generator") mode = get_config_arg("mode", str, "generator")
assert mode in set(["generator", assert mode in set([
"discriminator", "generator", "discriminator", "generator_training", "discriminator_training"
"generator_training", ])
"discriminator_training"])
is_generator_training = mode == "generator_training" is_generator_training = mode == "generator_training"
is_discriminator_training = mode == "discriminator_training" is_discriminator_training = mode == "discriminator_training"
@ -38,8 +37,8 @@ sample_dim = 2
settings( settings(
batch_size=128, batch_size=128,
learning_rate=1e-4, learning_rate=1e-4,
learning_method=AdamOptimizer(beta1=0.5) learning_method=AdamOptimizer(beta1=0.5))
)
def discriminator(sample): def discriminator(sample):
""" """
@ -50,70 +49,87 @@ def discriminator(sample):
of the sample is from real data. of the sample is from real data.
""" """
param_attr = ParamAttr(is_static=is_generator_training) param_attr = ParamAttr(is_static=is_generator_training)
bias_attr = ParamAttr(is_static=is_generator_training, bias_attr = ParamAttr(
initial_mean=1.0, is_static=is_generator_training, initial_mean=1.0, initial_std=0)
initial_std=0)
hidden = fc_layer(
hidden = fc_layer(input=sample, name="dis_hidden", size=hidden_dim, input=sample,
bias_attr=bias_attr, name="dis_hidden",
param_attr=param_attr, size=hidden_dim,
act=ReluActivation()) bias_attr=bias_attr,
param_attr=param_attr,
hidden2 = fc_layer(input=hidden, name="dis_hidden2", size=hidden_dim, act=ReluActivation())
bias_attr=bias_attr,
param_attr=param_attr, hidden2 = fc_layer(
act=LinearActivation()) input=hidden,
name="dis_hidden2",
hidden_bn = batch_norm_layer(hidden2, size=hidden_dim,
act=ReluActivation(), bias_attr=bias_attr,
name="dis_hidden_bn", param_attr=param_attr,
bias_attr=bias_attr, act=LinearActivation())
param_attr=ParamAttr(is_static=is_generator_training,
initial_mean=1.0, hidden_bn = batch_norm_layer(
initial_std=0.02), hidden2,
use_global_stats=False) act=ReluActivation(),
name="dis_hidden_bn",
return fc_layer(input=hidden_bn, name="dis_prob", size=2, bias_attr=bias_attr,
bias_attr=bias_attr, param_attr=ParamAttr(
param_attr=param_attr, is_static=is_generator_training, initial_mean=1.0,
act=SoftmaxActivation()) initial_std=0.02),
use_global_stats=False)
return fc_layer(
input=hidden_bn,
name="dis_prob",
size=2,
bias_attr=bias_attr,
param_attr=param_attr,
act=SoftmaxActivation())
def generator(noise): def generator(noise):
""" """
generator generates a sample given noise generator generates a sample given noise
""" """
param_attr = ParamAttr(is_static=is_discriminator_training) param_attr = ParamAttr(is_static=is_discriminator_training)
bias_attr = ParamAttr(is_static=is_discriminator_training, bias_attr = ParamAttr(
initial_mean=1.0, is_static=is_discriminator_training, initial_mean=1.0, initial_std=0)
initial_std=0)
hidden = fc_layer(
hidden = fc_layer(input=noise, input=noise,
name="gen_layer_hidden", name="gen_layer_hidden",
size=hidden_dim, size=hidden_dim,
bias_attr=bias_attr, bias_attr=bias_attr,
param_attr=param_attr, param_attr=param_attr,
act=ReluActivation()) act=ReluActivation())
hidden2 = fc_layer(input=hidden, name="gen_hidden2", size=hidden_dim, hidden2 = fc_layer(
bias_attr=bias_attr, input=hidden,
param_attr=param_attr, name="gen_hidden2",
act=LinearActivation()) size=hidden_dim,
bias_attr=bias_attr,
hidden_bn = batch_norm_layer(hidden2, param_attr=param_attr,
act=ReluActivation(), act=LinearActivation())
name="gen_layer_hidden_bn",
bias_attr=bias_attr, hidden_bn = batch_norm_layer(
param_attr=ParamAttr(is_static=is_discriminator_training, hidden2,
initial_mean=1.0, act=ReluActivation(),
initial_std=0.02), name="gen_layer_hidden_bn",
use_global_stats=False) bias_attr=bias_attr,
param_attr=ParamAttr(
return fc_layer(input=hidden_bn, is_static=is_discriminator_training,
name="gen_layer1", initial_mean=1.0,
size=sample_dim, initial_std=0.02),
bias_attr=bias_attr, use_global_stats=False)
param_attr=param_attr,
act=LinearActivation()) return fc_layer(
input=hidden_bn,
name="gen_layer1",
size=sample_dim,
bias_attr=bias_attr,
param_attr=param_attr,
act=LinearActivation())
if is_generator_training: if is_generator_training:
noise = data_layer(name="noise", size=noise_dim) noise = data_layer(name="noise", size=noise_dim)
@ -126,7 +142,8 @@ if is_generator_training or is_discriminator_training:
label = data_layer(name="label", size=1) label = data_layer(name="label", size=1)
prob = discriminator(sample) prob = discriminator(sample)
cost = cross_entropy(input=prob, label=label) cost = cross_entropy(input=prob, label=label)
classification_error_evaluator(input=prob, label=label, name=mode+'_error') classification_error_evaluator(
input=prob, label=label, name=mode + '_error')
outputs(cost) outputs(cost)
if is_generator: if is_generator:

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

@ -13,7 +13,6 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and # See the License for the specific language governing permissions and
# limitations under the License. # limitations under the License.
""" """
This configuration is a demonstration of how to implement the stacked LSTM This configuration is a demonstration of how to implement the stacked LSTM
with residual connections, i.e. an LSTM layer takes the sum of the hidden states with residual connections, i.e. an LSTM layer takes the sum of the hidden states
@ -46,11 +45,12 @@ is_predict = get_config_arg('is_predict', bool, False)
trn = 'data/train.list' if not is_predict else None trn = 'data/train.list' if not is_predict else None
tst = 'data/test.list' if not is_predict else 'data/pred.list' tst = 'data/test.list' if not is_predict else 'data/pred.list'
process = 'process' if not is_predict else 'process_predict' process = 'process' if not is_predict else 'process_predict'
define_py_data_sources2(train_list=trn, define_py_data_sources2(
test_list=tst, train_list=trn,
module="dataprovider_emb", test_list=tst,
obj=process, module="dataprovider_emb",
args={"dictionary": word_dict}) obj=process,
args={"dictionary": word_dict})
batch_size = 128 if not is_predict else 1 batch_size = 128 if not is_predict else 1
settings( settings(
@ -58,10 +58,9 @@ settings(
learning_rate=2e-3, learning_rate=2e-3,
learning_method=AdamOptimizer(), learning_method=AdamOptimizer(),
regularization=L2Regularization(8e-4), regularization=L2Regularization(8e-4),
gradient_clipping_threshold=25 gradient_clipping_threshold=25)
)
bias_attr = ParamAttr(initial_std=0.,l2_rate=0.) bias_attr = ParamAttr(initial_std=0., l2_rate=0.)
data = data_layer(name="word", size=len(word_dict)) data = data_layer(name="word", size=len(word_dict))
emb = embedding_layer(input=data, size=128) emb = embedding_layer(input=data, size=128)
@ -73,17 +72,15 @@ for i in range(3):
# The input to the current layer is the sum of the hidden state # The input to the current layer is the sum of the hidden state
# and input of the previous layer. # and input of the previous layer.
current_input = addto_layer(input=[previous_input, previous_hidden_state]) current_input = addto_layer(input=[previous_input, previous_hidden_state])
hidden_state = simple_lstm(input=current_input, size=128, hidden_state = simple_lstm(
lstm_cell_attr=ExtraAttr(drop_rate=0.1)) input=current_input, size=128, lstm_cell_attr=ExtraAttr(drop_rate=0.1))
previous_input, previous_hidden_state = current_input, hidden_state previous_input, previous_hidden_state = current_input, hidden_state
lstm = previous_hidden_state lstm = previous_hidden_state
lstm_last = pooling_layer(input=lstm, pooling_type=MaxPooling()) lstm_last = pooling_layer(input=lstm, pooling_type=MaxPooling())
output = fc_layer(input=lstm_last, size=2, output = fc_layer(
bias_attr=bias_attr, input=lstm_last, size=2, bias_attr=bias_attr, act=SoftmaxActivation())
act=SoftmaxActivation())
if is_predict: if is_predict:
maxid = maxid_layer(output) maxid = maxid_layer(output)

@ -33,7 +33,7 @@ def extract_dict_features(pair_file, feature_file):
ctx_n1 = sentence_list[verb_index - 1] ctx_n1 = sentence_list[verb_index - 1]
else: else:
ctx_n1 = 'bos' ctx_n1 = 'bos'
if verb_index > 1: if verb_index > 1:
mark[verb_index - 2] = 1 mark[verb_index - 2] = 1
ctx_n2 = sentence_list[verb_index - 2] ctx_n2 = sentence_list[verb_index - 2]
@ -48,7 +48,7 @@ def extract_dict_features(pair_file, feature_file):
ctx_p1 = sentence_list[verb_index + 1] ctx_p1 = sentence_list[verb_index + 1]
else: else:
ctx_p1 = 'eos' ctx_p1 = 'eos'
if verb_index < len(labels_list) - 3: if verb_index < len(labels_list) - 3:
mark[verb_index + 2] = 1 mark[verb_index + 2] = 1
ctx_p2 = sentence_list[verb_index + 2] ctx_p2 = sentence_list[verb_index + 2]
@ -69,7 +69,6 @@ def extract_dict_features(pair_file, feature_file):
feature_out.write(feature_str + '\n') feature_out.write(feature_str + '\n')
if __name__ == '__main__': if __name__ == '__main__':
usage = '-p pair_file -f feature_file' usage = '-p pair_file -f feature_file'

@ -66,8 +66,8 @@ def transform_labels(sentences, labels):
else: else:
verb_list = [] verb_list = []
for x in labels[i][0]: for x in labels[i][0]:
if x !='-': if x != '-':
verb_list.append(x) verb_list.append(x)
for j in xrange(1, len(labels[i])): for j in xrange(1, len(labels[i])):
label_list = labels[i][j] label_list = labels[i][j]
@ -93,7 +93,7 @@ def transform_labels(sentences, labels):
is_in_bracket = True is_in_bracket = True
else: else:
print 'error:', ll print 'error:', ll
sen_lab_pair.append((sentences[i], verb_list[j-1], label_seq)) sen_lab_pair.append((sentences[i], verb_list[j - 1], label_seq))
return sen_lab_pair return sen_lab_pair
@ -103,7 +103,7 @@ def write_file(sen_lab_pair, output_file):
sentence = x[0] sentence = x[0]
label_seq = ' '.join(x[2]) label_seq = ' '.join(x[2])
assert len(sentence.split()) == len(x[2]) assert len(sentence.split()) == len(x[2])
fout.write(sentence + '\t' + x[1]+'\t' +label_seq + '\n') fout.write(sentence + '\t' + x[1] + '\t' + label_seq + '\n')
if __name__ == '__main__': if __name__ == '__main__':

@ -21,7 +21,7 @@ def hook(settings, word_dict, label_dict, predicate_dict, **kwargs):
settings.word_dict = word_dict settings.word_dict = word_dict
settings.label_dict = label_dict settings.label_dict = label_dict
settings.predicate_dict = predicate_dict settings.predicate_dict = predicate_dict
#all inputs are integral and sequential type #all inputs are integral and sequential type
settings.slots = [ settings.slots = [
integer_value_sequence(len(word_dict)), integer_value_sequence(len(word_dict)),
@ -29,25 +29,28 @@ def hook(settings, word_dict, label_dict, predicate_dict, **kwargs):
integer_value_sequence(len(word_dict)), integer_value_sequence(len(word_dict)),
integer_value_sequence(len(word_dict)), integer_value_sequence(len(word_dict)),
integer_value_sequence(len(word_dict)), integer_value_sequence(len(word_dict)),
integer_value_sequence(len(word_dict)), integer_value_sequence(len(word_dict)),
integer_value_sequence(len(predicate_dict)), integer_value_sequence(len(predicate_dict)), integer_value_sequence(2),
integer_value_sequence(2),
integer_value_sequence(len(label_dict)) integer_value_sequence(len(label_dict))
] ]
def get_batch_size(yeild_data): def get_batch_size(yeild_data):
return len(yeild_data[0]) return len(yeild_data[0])
@provider(init_hook=hook, should_shuffle=True, calc_batch_size=get_batch_size,
can_over_batch_size=False, cache=CacheType.CACHE_PASS_IN_MEM) @provider(
init_hook=hook,
should_shuffle=True,
calc_batch_size=get_batch_size,
can_over_batch_size=False,
cache=CacheType.CACHE_PASS_IN_MEM)
def process(settings, file_name): def process(settings, file_name):
with open(file_name, 'r') as fdata: with open(file_name, 'r') as fdata:
for line in fdata: for line in fdata:
sentence, predicate, ctx_n2, ctx_n1, ctx_0, ctx_p1, ctx_p2, mark, label = \ sentence, predicate, ctx_n2, ctx_n1, ctx_0, ctx_p1, ctx_p2, mark, label = \
line.strip().split('\t') line.strip().split('\t')
words = sentence.split() words = sentence.split()
sen_len = len(words) sen_len = len(words)
word_slot = [settings.word_dict.get(w, UNK_IDX) for w in words] word_slot = [settings.word_dict.get(w, UNK_IDX) for w in words]

@ -20,7 +20,7 @@ from paddle.trainer_config_helpers import *
#file paths #file paths
word_dict_file = './data/wordDict.txt' word_dict_file = './data/wordDict.txt'
label_dict_file = './data/targetDict.txt' label_dict_file = './data/targetDict.txt'
predicate_file= './data/verbDict.txt' predicate_file = './data/verbDict.txt'
train_list_file = './data/train.list' train_list_file = './data/train.list'
test_list_file = './data/test.list' test_list_file = './data/test.list'
@ -47,7 +47,6 @@ if not is_predict:
w = line.strip() w = line.strip()
predicate_dict[w] = i predicate_dict[w] = i
if is_test: if is_test:
train_list_file = None train_list_file = None
@ -57,9 +56,11 @@ if not is_predict:
test_list=test_list_file, test_list=test_list_file,
module='dataprovider', module='dataprovider',
obj='process', obj='process',
args={'word_dict': word_dict, args={
'label_dict': label_dict, 'word_dict': word_dict,
'predicate_dict': predicate_dict }) 'label_dict': label_dict,
'predicate_dict': predicate_dict
})
word_dict_len = len(word_dict) word_dict_len = len(word_dict)
label_dict_len = len(label_dict) label_dict_len = len(label_dict)
@ -77,24 +78,16 @@ mark_dim = 5
hidden_dim = 512 hidden_dim = 512
depth = 8 depth = 8
########################### Optimizer ####################################### ########################### Optimizer #######################################
settings( settings(
batch_size=150, batch_size=150,
learning_method=MomentumOptimizer(momentum=0), learning_method=MomentumOptimizer(momentum=0),
learning_rate=2e-2, learning_rate=2e-2,
regularization=L2Regularization(8e-4), regularization=L2Regularization(8e-4),
is_async=False, is_async=False,
model_average=ModelAverage(average_window=0.5, model_average=ModelAverage(
max_average_window=10000), average_window=0.5, max_average_window=10000), )
)
####################################### network ############################## ####################################### network ##############################
#8 features and 1 target #8 features and 1 target
@ -108,22 +101,28 @@ ctx_p1 = data_layer(name='ctx_p1_data', size=word_dict_len)
ctx_p2 = data_layer(name='ctx_p2_data', size=word_dict_len) ctx_p2 = data_layer(name='ctx_p2_data', size=word_dict_len)
mark = data_layer(name='mark_data', size=mark_dict_len) mark = data_layer(name='mark_data', size=mark_dict_len)
if not is_predict: if not is_predict:
target = data_layer(name='target', size=label_dict_len) target = data_layer(name='target', size=label_dict_len)
default_std = 1 / math.sqrt(hidden_dim) / 3.0
default_std=1/math.sqrt(hidden_dim)/3.0
emb_para = ParameterAttribute(name='emb', initial_std=0., learning_rate=0.) emb_para = ParameterAttribute(name='emb', initial_std=0., learning_rate=0.)
std_0 = ParameterAttribute(initial_std=0.) std_0 = ParameterAttribute(initial_std=0.)
std_default = ParameterAttribute(initial_std=default_std) std_default = ParameterAttribute(initial_std=default_std)
predicate_embedding = embedding_layer(size=word_dim, input=predicate, param_attr=ParameterAttribute(name='vemb',initial_std=default_std)) predicate_embedding = embedding_layer(
mark_embedding = embedding_layer(name='word_ctx-in_embedding', size=mark_dim, input=mark, param_attr=std_0) size=word_dim,
input=predicate,
word_input=[word, ctx_n2, ctx_n1, ctx_0, ctx_p1, ctx_p2] param_attr=ParameterAttribute(
emb_layers = [embedding_layer(size=word_dim, input=x, param_attr=emb_para) for x in word_input] name='vemb', initial_std=default_std))
mark_embedding = embedding_layer(
name='word_ctx-in_embedding', size=mark_dim, input=mark, param_attr=std_0)
word_input = [word, ctx_n2, ctx_n1, ctx_0, ctx_p1, ctx_p2]
emb_layers = [
embedding_layer(
size=word_dim, input=x, param_attr=emb_para) for x in word_input
]
emb_layers.append(predicate_embedding) emb_layers.append(predicate_embedding)
emb_layers.append(mark_embedding) emb_layers.append(mark_embedding)
@ -131,84 +130,89 @@ hidden_0 = mixed_layer(
name='hidden0', name='hidden0',
size=hidden_dim, size=hidden_dim,
bias_attr=std_default, bias_attr=std_default,
input=[ full_matrix_projection(input=emb, param_attr=std_default ) for emb in emb_layers ]) input=[
full_matrix_projection(
input=emb, param_attr=std_default) for emb in emb_layers
])
mix_hidden_lr = 1e-3 mix_hidden_lr = 1e-3
lstm_para_attr = ParameterAttribute(initial_std=0.0, learning_rate=1.0) lstm_para_attr = ParameterAttribute(initial_std=0.0, learning_rate=1.0)
hidden_para_attr = ParameterAttribute(initial_std=default_std, learning_rate=mix_hidden_lr) hidden_para_attr = ParameterAttribute(
initial_std=default_std, learning_rate=mix_hidden_lr)
lstm_0 = lstmemory(name='lstm0',
input=hidden_0, lstm_0 = lstmemory(
act=ReluActivation(), name='lstm0',
gate_act=SigmoidActivation(), input=hidden_0,
state_act=SigmoidActivation(), act=ReluActivation(),
bias_attr=std_0, gate_act=SigmoidActivation(),
param_attr=lstm_para_attr) state_act=SigmoidActivation(),
bias_attr=std_0,
param_attr=lstm_para_attr)
#stack L-LSTM and R-LSTM with direct edges #stack L-LSTM and R-LSTM with direct edges
input_tmp = [hidden_0, lstm_0] input_tmp = [hidden_0, lstm_0]
for i in range(1, depth): for i in range(1, depth):
mix_hidden = mixed_layer(name='hidden'+str(i), mix_hidden = mixed_layer(
size=hidden_dim, name='hidden' + str(i),
bias_attr=std_default, size=hidden_dim,
input=[full_matrix_projection(input=input_tmp[0], param_attr=hidden_para_attr), bias_attr=std_default,
full_matrix_projection(input=input_tmp[1], param_attr=lstm_para_attr) input=[
] full_matrix_projection(
) input=input_tmp[0], param_attr=hidden_para_attr),
full_matrix_projection(
lstm = lstmemory(name='lstm'+str(i), input=input_tmp[1], param_attr=lstm_para_attr)
input=mix_hidden, ])
act=ReluActivation(),
gate_act=SigmoidActivation(), lstm = lstmemory(
state_act=SigmoidActivation(), name='lstm' + str(i),
reverse=((i % 2)==1), input=mix_hidden,
bias_attr=std_0, act=ReluActivation(),
param_attr=lstm_para_attr) gate_act=SigmoidActivation(),
state_act=SigmoidActivation(),
reverse=((i % 2) == 1),
bias_attr=std_0,
param_attr=lstm_para_attr)
input_tmp = [mix_hidden, lstm] input_tmp = [mix_hidden, lstm]
feature_out = mixed_layer(name='output', feature_out = mixed_layer(
size=label_dict_len, name='output',
bias_attr=std_default, size=label_dict_len,
input=[full_matrix_projection(input=input_tmp[0], param_attr=hidden_para_attr), bias_attr=std_default,
full_matrix_projection(input=input_tmp[1], param_attr=lstm_para_attr) input=[
], full_matrix_projection(
) input=input_tmp[0], param_attr=hidden_para_attr),
full_matrix_projection(
input=input_tmp[1], param_attr=lstm_para_attr)
], )
if not is_predict: if not is_predict:
crf_l = crf_layer( name = 'crf', crf_l = crf_layer(
size = label_dict_len, name='crf',
input = feature_out, size=label_dict_len,
label = target, input=feature_out,
param_attr=ParameterAttribute(name='crfw',initial_std=default_std, learning_rate=mix_hidden_lr) label=target,
param_attr=ParameterAttribute(
) name='crfw', initial_std=default_std, learning_rate=mix_hidden_lr))
crf_dec_l = crf_decoding_layer(
crf_dec_l = crf_decoding_layer(name = 'crf_dec_l', name='crf_dec_l',
size = label_dict_len, size=label_dict_len,
input = feature_out, input=feature_out,
label = target, label=target,
param_attr=ParameterAttribute(name='crfw') param_attr=ParameterAttribute(name='crfw'))
)
eval = sum_evaluator(input=crf_dec_l) eval = sum_evaluator(input=crf_dec_l)
outputs(crf_l) outputs(crf_l)
else: else:
crf_dec_l = crf_decoding_layer(name = 'crf_dec_l', crf_dec_l = crf_decoding_layer(
size = label_dict_len, name='crf_dec_l',
input = feature_out, size=label_dict_len,
param_attr=ParameterAttribute(name='crfw') input=feature_out,
) param_attr=ParameterAttribute(name='crfw'))
outputs(crf_dec_l) outputs(crf_dec_l)

@ -26,7 +26,8 @@ UNK_IDX = 0
class Prediction(): class Prediction():
def __init__(self, train_conf, dict_file, model_dir, label_file, predicate_dict_file): def __init__(self, train_conf, dict_file, model_dir, label_file,
predicate_dict_file):
""" """
train_conf: trainer configure. train_conf: trainer configure.
dict_file: word dictionary file name. dict_file: word dictionary file name.
@ -35,7 +36,7 @@ class Prediction():
self.dict = {} self.dict = {}
self.labels = {} self.labels = {}
self.predicate_dict={} self.predicate_dict = {}
self.labels_reverse = {} self.labels_reverse = {}
self.load_dict_label(dict_file, label_file, predicate_dict_file) self.load_dict_label(dict_file, label_file, predicate_dict_file)
@ -44,25 +45,18 @@ class Prediction():
len_pred = len(self.predicate_dict) len_pred = len(self.predicate_dict)
conf = parse_config( conf = parse_config(
train_conf, train_conf, 'dict_len=' + str(len_dict) + ',label_len=' +
'dict_len=' + str(len_dict) + str(len_label) + ',pred_len=' + str(len_pred) + ',is_predict=True')
',label_len=' + str(len_label) +
',pred_len=' + str(len_pred) +
',is_predict=True')
self.network = swig_paddle.GradientMachine.createFromConfigProto( self.network = swig_paddle.GradientMachine.createFromConfigProto(
conf.model_config) conf.model_config)
self.network.loadParameters(model_dir) self.network.loadParameters(model_dir)
slots = [ slots = [
integer_value_sequence(len_dict), integer_value_sequence(len_dict), integer_value_sequence(len_dict),
integer_value_sequence(len_dict), integer_value_sequence(len_dict), integer_value_sequence(len_dict),
integer_value_sequence(len_dict), integer_value_sequence(len_dict), integer_value_sequence(len_dict),
integer_value_sequence(len_dict), integer_value_sequence(len_pred), integer_value_sequence(2)
integer_value_sequence(len_dict), ]
integer_value_sequence(len_dict),
integer_value_sequence(len_pred),
integer_value_sequence(2)
]
self.converter = DataProviderConverter(slots) self.converter = DataProviderConverter(slots)
def load_dict_label(self, dict_file, label_file, predicate_dict_file): def load_dict_label(self, dict_file, label_file, predicate_dict_file):
@ -78,6 +72,7 @@ class Prediction():
for line_count, line in enumerate(open(predicate_dict_file, 'r')): for line_count, line in enumerate(open(predicate_dict_file, 'r')):
self.predicate_dict[line.strip()] = line_count self.predicate_dict[line.strip()] = line_count
def get_data(self, data_file): def get_data(self, data_file):
""" """
Get input data of paddle format. Get input data of paddle format.
@ -88,9 +83,10 @@ class Prediction():
).split('\t') ).split('\t')
words = sentence.split() words = sentence.split()
sen_len = len(words) sen_len = len(words)
word_slot = [self.dict.get(w, UNK_IDX) for w in words] word_slot = [self.dict.get(w, UNK_IDX) for w in words]
predicate_slot = [self.predicate_dict.get(predicate, UNK_IDX)] * sen_len predicate_slot = [self.predicate_dict.get(predicate, UNK_IDX)
] * sen_len
ctx_n2_slot = [self.dict.get(ctx_n2, UNK_IDX)] * sen_len ctx_n2_slot = [self.dict.get(ctx_n2, UNK_IDX)] * sen_len
ctx_n1_slot = [self.dict.get(ctx_n1, UNK_IDX)] * sen_len ctx_n1_slot = [self.dict.get(ctx_n1, UNK_IDX)] * sen_len
ctx_0_slot = [self.dict.get(ctx_0, UNK_IDX)] * sen_len ctx_0_slot = [self.dict.get(ctx_0, UNK_IDX)] * sen_len
@ -99,7 +95,7 @@ class Prediction():
marks = mark.split() marks = mark.split()
mark_slot = [int(w) for w in marks] mark_slot = [int(w) for w in marks]
yield word_slot, ctx_n2_slot, ctx_n1_slot, \ yield word_slot, ctx_n2_slot, ctx_n1_slot, \
ctx_0_slot, ctx_p1_slot, ctx_p2_slot, predicate_slot, mark_slot ctx_0_slot, ctx_p1_slot, ctx_p2_slot, predicate_slot, mark_slot
@ -123,8 +119,9 @@ class Prediction():
def option_parser(): def option_parser():
usage = ("python predict.py -c config -w model_dir " usage = (
"-d word dictionary -l label_file -i input_file -p pred_dict_file") "python predict.py -c config -w model_dir "
"-d word dictionary -l label_file -i input_file -p pred_dict_file")
parser = OptionParser(usage="usage: %s [options]" % usage) parser = OptionParser(usage="usage: %s [options]" % usage)
parser.add_option( parser.add_option(
"-c", "-c",
@ -187,8 +184,9 @@ def main():
output_file = options.output_file output_file = options.output_file
swig_paddle.initPaddle("--use_gpu=0") swig_paddle.initPaddle("--use_gpu=0")
predict = Prediction(train_conf, dict_file, model_path, label_file, predict_dict_file) predict = Prediction(train_conf, dict_file, model_path, label_file,
predict.predict(data_file,output_file) predict_dict_file)
predict.predict(data_file, output_file)
if __name__ == '__main__': if __name__ == '__main__':

@ -71,9 +71,7 @@ class SentimentPrediction():
transform word into integer index according to the dictionary. transform word into integer index according to the dictionary.
""" """
words = data.strip().split() words = data.strip().split()
word_slot = [ word_slot = [self.word_dict[w] for w in words if w in self.word_dict]
self.word_dict[w] for w in words if w in self.word_dict
]
return word_slot return word_slot
def batch_predict(self, data_batch): def batch_predict(self, data_batch):
@ -85,8 +83,8 @@ class SentimentPrediction():
if self.label is None: if self.label is None:
print("predicting label is %d" % (lab[0])) print("predicting label is %d" % (lab[0]))
else: else:
print("predicting label is %s" % print("predicting label is %s" % (self.label[lab[0]]))
(self.label[lab[0]]))
def option_parser(): def option_parser():
usage = "python predict.py -n config -w model_dir -d dictionary -i input_file " usage = "python predict.py -n config -w model_dir -d dictionary -i input_file "
@ -143,9 +141,10 @@ def main():
batch.append([predict.get_index(line)]) batch.append([predict.get_index(line)])
if len(batch) == batch_size: if len(batch) == batch_size:
predict.batch_predict(batch) predict.batch_predict(batch)
batch=[] batch = []
if len(batch) > 0: if len(batch) > 0:
predict.batch_predict(batch) predict.batch_predict(batch)
if __name__ == '__main__': if __name__ == '__main__':
main() main()

@ -306,4 +306,4 @@ I1116 09:10:18.019069 50 ParameterClient2.cpp:122] pserver 2 192.168.223.143:
I1116 09:10:18.019492 50 ParameterClient2.cpp:122] pserver 3 192.168.223.143:7165 I1116 09:10:18.019492 50 ParameterClient2.cpp:122] pserver 3 192.168.223.143:7165
I1116 09:10:18.019716 50 ParameterClient2.cpp:122] pserver 4 192.168.129.71:7164 I1116 09:10:18.019716 50 ParameterClient2.cpp:122] pserver 4 192.168.129.71:7164
I1116 09:10:18.019836 50 ParameterClient2.cpp:122] pserver 5 192.168.129.71:7165 I1116 09:10:18.019836 50 ParameterClient2.cpp:122] pserver 5 192.168.129.71:7165
``` ```

@ -40,4 +40,4 @@ spec:
- name: jobpath - name: jobpath
mountPath: /home/jobpath mountPath: /home/jobpath
restartPolicy: Never restartPolicy: Never

@ -19,7 +19,6 @@ import socket
import os import os
import argparse import argparse
# configuration for cluster # configuration for cluster
API = "/api/v1/namespaces/" API = "/api/v1/namespaces/"
JOBSELECTOR = "labelSelector=job-name=" JOBSELECTOR = "labelSelector=job-name="
@ -145,8 +144,8 @@ def startPaddle(idMap={}, train_args_dict=None):
if __name__ == '__main__': if __name__ == '__main__':
parser = argparse.ArgumentParser(prog="start_paddle.py", parser = argparse.ArgumentParser(
description='simple tool for k8s') prog="start_paddle.py", description='simple tool for k8s')
args, train_args_list = parser.parse_known_args() args, train_args_list = parser.parse_known_args()
train_args = refine_unknown_args(train_args_list) train_args = refine_unknown_args(train_args_list)
train_args_dict = dict(zip(train_args[:-1:2], train_args[1::2])) train_args_dict = dict(zip(train_args[:-1:2], train_args[1::2]))

@ -1,8 +1,8 @@
情感分析教程 情感分析教程
=========================== ===========================
.. toctree:: .. toctree::
:maxdepth: 3 :maxdepth: 3
:glob: :glob:
Training Locally <sentiment_analysis.md> Training Locally <sentiment_analysis.md>

@ -28,4 +28,4 @@ $(document).ready(function(){
$('.doc-menu-vertical').find('li.current').last().addClass('active'); $('.doc-menu-vertical').find('li.current').last().addClass('active');
$('.doc-menu-vertical').perfectScrollbar(); $('.doc-menu-vertical').perfectScrollbar();
}); });

@ -15,8 +15,8 @@ limitations under the License. */
#include "PaddleAPI.h" #include "PaddleAPI.h"
#include "PaddleAPIPrivate.h" #include "PaddleAPIPrivate.h"
#include "paddle/gserver/gradientmachines/NeuralNetwork.h"
#include "Internal.h" #include "Internal.h"
#include "paddle/gserver/gradientmachines/NeuralNetwork.h"
std::vector<int> GradientMachine::defaultParamTypes = { std::vector<int> GradientMachine::defaultParamTypes = {
PARAMETER_VALUE, PARAMETER_GRADIENT, PARAMETER_MOMENTUM}; PARAMETER_VALUE, PARAMETER_GRADIENT, PARAMETER_MOMENTUM};

@ -16,14 +16,13 @@ limitations under the License. */
#include "PaddleAPI.h" #include "PaddleAPI.h"
#include <vector>
#include <algorithm> #include <algorithm>
#include <vector>
template <typename T1, typename T2> template <typename T1, typename T2>
void staticCastVector(std::vector<T2>* dest, const std::vector<T1>& src) { void staticCastVector(std::vector<T2>* dest, const std::vector<T1>& src) {
dest->resize(src.size()); dest->resize(src.size());
std::transform(src.begin(), std::transform(src.begin(), src.end(), dest->begin(), [](T1 t) {
src.end(), return static_cast<T2>(t);
dest->begin(), });
[](T1 t) { return static_cast<T2>(t); });
} }

@ -12,12 +12,12 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and See the License for the specific language governing permissions and
limitations under the License. */ limitations under the License. */
#include "PaddleAPI.h"
#include "paddle/math/Matrix.h" #include "paddle/math/Matrix.h"
#include "paddle/math/SparseMatrix.h"
#include "paddle/math/CpuSparseMatrix.h"
#include <iostream>
#include <cstring> #include <cstring>
#include <iostream>
#include "PaddleAPI.h"
#include "paddle/math/CpuSparseMatrix.h"
#include "paddle/math/SparseMatrix.h"
struct MatrixPrivate { struct MatrixPrivate {
std::shared_ptr<paddle::Matrix> mat; std::shared_ptr<paddle::Matrix> mat;

@ -16,8 +16,8 @@ limitations under the License. */
#include <stddef.h> #include <stddef.h>
#include <stdint.h> #include <stdint.h>
#include <string>
#include <stdexcept> #include <stdexcept>
#include <string>
#include <vector> #include <vector>
#include "paddle/utils/GlobalConstants.h" #include "paddle/utils/GlobalConstants.h"
#include "paddle/utils/TypeDefs.h" #include "paddle/utils/TypeDefs.h"

Some files were not shown because too many files have changed in this diff Show More

Loading…
Cancel
Save