Resolve conflict

avx_docs
Yi Wang 9 years ago
commit f9f2741fd0

@ -8,10 +8,13 @@ os:
env:
- JOB=DOCS
- JOB=BUILD_AND_TEST
- JOB=PRE_COMMIT
matrix:
exclude:
- os: osx
env: JOB=DOCS # Only generate documentation in linux
env: JOB=DOCS # Only generate documentation in linux.
- os: osx
env: JOB=PRE_COMMIT # Only check pre-commit hook in linux
addons:
apt:
@ -39,18 +42,23 @@ addons:
- lcov
- graphviz
- swig
- clang-format-3.8
before_install:
- |
if [ ${JOB} == "BUILD_AND_TEST" ]; then
if ! git diff --name-only $TRAVIS_COMMIT_RANGE | grep -qvE '(\.md$)|(\.rst$)|(\.jpg$)|(\.png$)'
local change_list=`git diff --name-only $TRAVIS_COMMIT_RANGE`
if [ $? -eq 0 ]; then # if git diff return no zero, then rerun unit test.
if ! echo ${change_list} | grep -qvE '(\.md$)|(\.rst$)|(\.jpg$)|(\.png$)'
then
echo "Only markdown docs were updated, stopping build process."
exit
fi
fi
fi
- if [[ "$TRAVIS_OS_NAME" == "linux" ]]; then sudo paddle/scripts/travis/before_install.linux.sh; fi
- if [[ "$TRAVIS_OS_NAME" == "osx" ]]; then paddle/scripts/travis/before_install.osx.sh; fi
- pip install wheel protobuf sphinx recommonmark virtualenv numpy sphinx_rtd_theme
- if [[ "$JOB" == "PRE_COMMIT" ]]; then sudo ln -s /usr/bin/clang-format-3.8 /usr/bin/clang-format; fi
- pip install wheel protobuf sphinx recommonmark virtualenv numpy sphinx_rtd_theme pre-commit
script:
- paddle/scripts/travis/main.sh
notifications:

@ -3,8 +3,7 @@ http_archive(
name="protobuf",
url="http://github.com/google/protobuf/archive/v3.1.0.tar.gz",
sha256="0a0ae63cbffc274efb573bdde9a253e3f32e458c41261df51c5dbc5ad541e8f7",
strip_prefix = "protobuf-3.1.0",
)
strip_prefix="protobuf-3.1.0", )
# External dependency to gtest 1.7.0. This method comes from
# https://www.bazel.io/versions/master/docs/tutorial/cpp.html.

@ -25,4 +25,3 @@ test 4 2 256 512
test 4 2 512 128
test 4 2 512 256
test 4 2 512 512

@ -15,4 +15,3 @@ set -e
wget https://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz
tar zxf cifar-10-python.tar.gz
rm cifar-10-python.tar.gz

@ -15,5 +15,3 @@ do
gunzip ${fname}.gz
fi
done

@ -14,10 +14,9 @@
from paddle.trainer_config_helpers import *
mode = get_config_arg("mode", str, "generator")
assert mode in set(["generator",
"discriminator",
"generator_training",
"discriminator_training"])
assert mode in set([
"generator", "discriminator", "generator_training", "discriminator_training"
])
is_generator_training = mode == "generator_training"
is_discriminator_training = mode == "discriminator_training"
@ -38,8 +37,8 @@ sample_dim = 2
settings(
batch_size=128,
learning_rate=1e-4,
learning_method=AdamOptimizer(beta1=0.5)
)
learning_method=AdamOptimizer(beta1=0.5))
def discriminator(sample):
"""
@ -50,71 +49,88 @@ def discriminator(sample):
of the sample is from real data.
"""
param_attr = ParamAttr(is_static=is_generator_training)
bias_attr = ParamAttr(is_static=is_generator_training,
initial_mean=1.0,
initial_std=0)
bias_attr = ParamAttr(
is_static=is_generator_training, initial_mean=1.0, initial_std=0)
hidden = fc_layer(input=sample, name="dis_hidden", size=hidden_dim,
hidden = fc_layer(
input=sample,
name="dis_hidden",
size=hidden_dim,
bias_attr=bias_attr,
param_attr=param_attr,
act=ReluActivation())
hidden2 = fc_layer(input=hidden, name="dis_hidden2", size=hidden_dim,
hidden2 = fc_layer(
input=hidden,
name="dis_hidden2",
size=hidden_dim,
bias_attr=bias_attr,
param_attr=param_attr,
act=LinearActivation())
hidden_bn = batch_norm_layer(hidden2,
hidden_bn = batch_norm_layer(
hidden2,
act=ReluActivation(),
name="dis_hidden_bn",
bias_attr=bias_attr,
param_attr=ParamAttr(is_static=is_generator_training,
initial_mean=1.0,
param_attr=ParamAttr(
is_static=is_generator_training, initial_mean=1.0,
initial_std=0.02),
use_global_stats=False)
return fc_layer(input=hidden_bn, name="dis_prob", size=2,
return fc_layer(
input=hidden_bn,
name="dis_prob",
size=2,
bias_attr=bias_attr,
param_attr=param_attr,
act=SoftmaxActivation())
def generator(noise):
"""
generator generates a sample given noise
"""
param_attr = ParamAttr(is_static=is_discriminator_training)
bias_attr = ParamAttr(is_static=is_discriminator_training,
initial_mean=1.0,
initial_std=0)
bias_attr = ParamAttr(
is_static=is_discriminator_training, initial_mean=1.0, initial_std=0)
hidden = fc_layer(input=noise,
hidden = fc_layer(
input=noise,
name="gen_layer_hidden",
size=hidden_dim,
bias_attr=bias_attr,
param_attr=param_attr,
act=ReluActivation())
hidden2 = fc_layer(input=hidden, name="gen_hidden2", size=hidden_dim,
hidden2 = fc_layer(
input=hidden,
name="gen_hidden2",
size=hidden_dim,
bias_attr=bias_attr,
param_attr=param_attr,
act=LinearActivation())
hidden_bn = batch_norm_layer(hidden2,
hidden_bn = batch_norm_layer(
hidden2,
act=ReluActivation(),
name="gen_layer_hidden_bn",
bias_attr=bias_attr,
param_attr=ParamAttr(is_static=is_discriminator_training,
param_attr=ParamAttr(
is_static=is_discriminator_training,
initial_mean=1.0,
initial_std=0.02),
use_global_stats=False)
return fc_layer(input=hidden_bn,
return fc_layer(
input=hidden_bn,
name="gen_layer1",
size=sample_dim,
bias_attr=bias_attr,
param_attr=param_attr,
act=LinearActivation())
if is_generator_training:
noise = data_layer(name="noise", size=noise_dim)
sample = generator(noise)
@ -126,7 +142,8 @@ if is_generator_training or is_discriminator_training:
label = data_layer(name="label", size=1)
prob = discriminator(sample)
cost = cross_entropy(input=prob, label=label)
classification_error_evaluator(input=prob, label=label, name=mode+'_error')
classification_error_evaluator(
input=prob, label=label, name=mode + '_error')
outputs(cost)
if is_generator:

@ -15,10 +15,9 @@ from paddle.trainer_config_helpers import *
mode = get_config_arg("mode", str, "generator")
dataSource = get_config_arg("data", str, "mnist")
assert mode in set(["generator",
"discriminator",
"generator_training",
"discriminator_training"])
assert mode in set([
"generator", "discriminator", "generator_training", "discriminator_training"
])
is_generator_training = mode == "generator_training"
is_discriminator_training = mode == "discriminator_training"
@ -47,13 +46,22 @@ s8, s16 = int(sample_dim/8), int(sample_dim/16)
settings(
batch_size=128,
learning_rate=2e-4,
learning_method=AdamOptimizer(beta1=0.5)
)
def conv_bn(input, channels, imgSize, num_filters, output_x, stride, name,
param_attr, bias_attr, param_attr_bn, bn, trans=False,
learning_method=AdamOptimizer(beta1=0.5))
def conv_bn(input,
channels,
imgSize,
num_filters,
output_x,
stride,
name,
param_attr,
bias_attr,
param_attr_bn,
bn,
trans=False,
act=ReluActivation()):
"""
conv_bn is a utility function that constructs a convolution/deconv layer
with an optional batch_norm layer
@ -84,16 +92,27 @@ def conv_bn(input, channels, imgSize, num_filters, output_x, stride, name,
nameApx = "_convt"
if bn:
conv = img_conv_layer(input, filter_size=filter_size,
conv = img_conv_layer(
input,
filter_size=filter_size,
num_filters=num_filters,
name=name + nameApx, num_channels=channels,
act=LinearActivation(), groups=1, stride=stride,
padding=padding, bias_attr=bias_attr,
param_attr=param_attr, shared_biases=True, layer_attr=None,
filter_size_y=None, stride_y=None, padding_y=None,
name=name + nameApx,
num_channels=channels,
act=LinearActivation(),
groups=1,
stride=stride,
padding=padding,
bias_attr=bias_attr,
param_attr=param_attr,
shared_biases=True,
layer_attr=None,
filter_size_y=None,
stride_y=None,
padding_y=None,
trans=trans)
conv_bn = batch_norm_layer(conv,
conv_bn = batch_norm_layer(
conv,
act=act,
name=name + nameApx + "_bn",
bias_attr=bias_attr,
@ -102,46 +121,57 @@ def conv_bn(input, channels, imgSize, num_filters, output_x, stride, name,
return conv_bn
else:
conv = img_conv_layer(input, filter_size=filter_size,
conv = img_conv_layer(
input,
filter_size=filter_size,
num_filters=num_filters,
name=name + nameApx, num_channels=channels,
act=act, groups=1, stride=stride,
padding=padding, bias_attr=bias_attr,
param_attr=param_attr, shared_biases=True, layer_attr=None,
filter_size_y=None, stride_y=None, padding_y=None,
name=name + nameApx,
num_channels=channels,
act=act,
groups=1,
stride=stride,
padding=padding,
bias_attr=bias_attr,
param_attr=param_attr,
shared_biases=True,
layer_attr=None,
filter_size_y=None,
stride_y=None,
padding_y=None,
trans=trans)
return conv
def generator(noise):
"""
generator generates a sample given noise
"""
param_attr = ParamAttr(is_static=is_discriminator_training,
initial_mean=0.0,
initial_std=0.02)
bias_attr = ParamAttr(is_static=is_discriminator_training,
initial_mean=0.0,
initial_std=0.0)
param_attr_bn=ParamAttr(is_static=is_discriminator_training,
initial_mean=1.0,
initial_std=0.02)
h1 = fc_layer(input=noise,
param_attr = ParamAttr(
is_static=is_discriminator_training, initial_mean=0.0, initial_std=0.02)
bias_attr = ParamAttr(
is_static=is_discriminator_training, initial_mean=0.0, initial_std=0.0)
param_attr_bn = ParamAttr(
is_static=is_discriminator_training, initial_mean=1.0, initial_std=0.02)
h1 = fc_layer(
input=noise,
name="gen_layer_h1",
size=s8 * s8 * gf_dim * 4,
bias_attr=bias_attr,
param_attr=param_attr,
act=LinearActivation())
h1_bn = batch_norm_layer(h1,
h1_bn = batch_norm_layer(
h1,
act=ReluActivation(),
name="gen_layer_h1_bn",
bias_attr=bias_attr,
param_attr=param_attr_bn,
use_global_stats=False)
h2_bn = conv_bn(h1_bn,
h2_bn = conv_bn(
h1_bn,
channels=gf_dim * 4,
output_x=s8,
num_filters=gf_dim * 2,
@ -154,7 +184,8 @@ def generator(noise):
bn=True,
trans=True)
h3_bn = conv_bn(h2_bn,
h3_bn = conv_bn(
h2_bn,
channels=gf_dim * 2,
output_x=s4,
num_filters=gf_dim,
@ -167,8 +198,8 @@ def generator(noise):
bn=True,
trans=True)
return conv_bn(h3_bn,
return conv_bn(
h3_bn,
channels=gf_dim,
output_x=s2,
num_filters=c_dim,
@ -191,18 +222,16 @@ def discriminator(sample):
of the sample is from generator and dimension 1 is the probabblity
of the sample is from real data.
"""
param_attr = ParamAttr(is_static=is_generator_training,
initial_mean=0.0,
initial_std=0.02)
bias_attr = ParamAttr(is_static=is_generator_training,
initial_mean=0.0,
initial_std=0.0)
param_attr_bn=ParamAttr(is_static=is_generator_training,
initial_mean=1.0,
initial_std=0.02)
h0 = conv_bn(sample,
param_attr = ParamAttr(
is_static=is_generator_training, initial_mean=0.0, initial_std=0.02)
bias_attr = ParamAttr(
is_static=is_generator_training, initial_mean=0.0, initial_std=0.0)
param_attr_bn = ParamAttr(
is_static=is_generator_training, initial_mean=1.0, initial_std=0.02)
h0 = conv_bn(
sample,
channels=c_dim,
imgSize=sample_dim,
num_filters=df_dim,
@ -214,7 +243,8 @@ def discriminator(sample):
param_attr_bn=param_attr_bn,
bn=False)
h1_bn = conv_bn(h0,
h1_bn = conv_bn(
h0,
channels=df_dim,
imgSize=s2,
num_filters=df_dim * 2,
@ -226,7 +256,8 @@ def discriminator(sample):
param_attr_bn=param_attr_bn,
bn=True)
h2_bn = conv_bn(h1_bn,
h2_bn = conv_bn(
h1_bn,
channels=df_dim * 2,
imgSize=s4,
num_filters=df_dim * 4,
@ -238,13 +269,15 @@ def discriminator(sample):
param_attr_bn=param_attr_bn,
bn=True)
return fc_layer(input=h2_bn, name="dis_prob", size=2,
return fc_layer(
input=h2_bn,
name="dis_prob",
size=2,
bias_attr=bias_attr,
param_attr=param_attr,
act=SoftmaxActivation())
if is_generator_training:
noise = data_layer(name="noise", size=noise_dim)
sample = generator(noise)
@ -256,7 +289,8 @@ if is_generator_training or is_discriminator_training:
label = data_layer(name="label", size=1)
prob = discriminator(sample)
cost = cross_entropy(input=prob, label=label)
classification_error_evaluator(input=prob, label=label, name=mode+'_error')
classification_error_evaluator(
input=prob, label=label, name=mode + '_error')
outputs(cost)
if is_generator:

@ -24,6 +24,7 @@ from paddle.trainer.config_parser import logger
import py_paddle.swig_paddle as api
import matplotlib.pyplot as plt
def plot2DScatter(data, outputfile):
'''
Plot the data as a 2D scatter plot and save to outputfile
@ -41,9 +42,11 @@ def plot2DScatter(data, outputfile):
plt.scatter(x, y)
plt.savefig(outputfile, bbox_inches='tight')
def CHECK_EQ(a, b):
assert a == b, "a=%s, b=%s" % (a, b)
def copy_shared_parameters(src, dst):
'''
copy the parameters from src to dst
@ -52,11 +55,9 @@ def copy_shared_parameters(src, dst):
:param dst: the destination of the parameters
:type dst: GradientMachine
'''
src_params = [src.getParameter(i)
for i in xrange(src.getParameterSize())]
src_params = [src.getParameter(i) for i in xrange(src.getParameterSize())]
src_params = dict([(p.getName(), p) for p in src_params])
for i in xrange(dst.getParameterSize()):
dst_param = dst.getParameter(i)
src_param = src_params.get(dst_param.getName(), None)
@ -68,14 +69,16 @@ def copy_shared_parameters(src, dst):
dst_value.copyFrom(src_value)
dst_param.setValueUpdated()
def print_parameters(src):
src_params = [src.getParameter(i)
for i in xrange(src.getParameterSize())]
src_params = [src.getParameter(i) for i in xrange(src.getParameterSize())]
print "***************"
for p in src_params:
print "Name is %s" % p.getName()
print "value is %s \n" % p.getBuf(api.PARAMETER_VALUE).copyToNumpyArray()
print "value is %s \n" % p.getBuf(api.PARAMETER_VALUE).copyToNumpyArray(
)
def load_mnist_data(imageFile):
f = open(imageFile, "rb")
@ -93,6 +96,7 @@ def load_mnist_data(imageFile):
f.close()
return data.astype('float32')
def load_cifar_data(cifar_path):
batch_size = 10000
data = numpy.zeros((5 * batch_size, 32 * 32 * 3), dtype="float32")
@ -106,11 +110,13 @@ def load_cifar_data(cifar_path):
data = data / 255.0 * 2.0 - 1.0
return data
# synthesize 2-D uniform data
def load_uniform_data():
data = numpy.random.rand(1000000, 2).astype('float32')
return data
def merge(images, size):
if images.shape[1] == 28 * 28:
h, w, c = 28, 28, 1
@ -124,6 +130,7 @@ def merge(images, size):
((images[idx, :].reshape((h, w, c), order="F").transpose(1, 0, 2) + 1.0) / 2.0 * 255.0)
return img.astype('uint8')
def save_images(images, path):
merged_img = merge(images, [8, 8])
if merged_img.shape[2] == 1:
@ -132,13 +139,16 @@ def save_images(images, path):
im = Image.fromarray(merged_img, mode="RGB")
im.save(path)
def get_real_samples(batch_size, data_np):
return data_np[numpy.random.choice(data_np.shape[0], batch_size,
replace=False),:]
return data_np[numpy.random.choice(
data_np.shape[0], batch_size, replace=False), :]
def get_noise(batch_size, noise_dim):
return numpy.random.normal(size=(batch_size, noise_dim)).astype('float32')
def get_fake_samples(generator_machine, batch_size, noise):
gen_inputs = api.Arguments.createArguments(1)
gen_inputs.setSlotValue(0, api.Matrix.createDenseFromNumpy(noise))
@ -147,12 +157,14 @@ def get_fake_samples(generator_machine, batch_size, noise):
fake_samples = gen_outputs.getSlotValue(0).copyToNumpyMat()
return fake_samples
def get_training_loss(training_machine, inputs):
outputs = api.Arguments.createArguments(0)
training_machine.forward(inputs, outputs, api.PASS_TEST)
loss = outputs.getSlotValue(0).copyToNumpyMat()
return numpy.mean(loss)
def prepare_discriminator_data_batch_pos(batch_size, data_np):
real_samples = get_real_samples(batch_size, data_np)
labels = numpy.ones(batch_size, dtype='int32')
@ -161,6 +173,7 @@ def prepare_discriminator_data_batch_pos(batch_size, data_np):
inputs.setSlotIds(1, api.IVector.createVectorFromNumpy(labels))
return inputs
def prepare_discriminator_data_batch_neg(generator_machine, batch_size, noise):
fake_samples = get_fake_samples(generator_machine, batch_size, noise)
labels = numpy.zeros(batch_size, dtype='int32')
@ -169,6 +182,7 @@ def prepare_discriminator_data_batch_neg(generator_machine, batch_size, noise):
inputs.setSlotIds(1, api.IVector.createVectorFromNumpy(labels))
return inputs
def prepare_generator_data_batch(batch_size, noise):
label = numpy.ones(batch_size, dtype='int32')
inputs = api.Arguments.createArguments(2)
@ -193,10 +207,9 @@ def get_layer_size(model_conf, layer_name):
def main():
parser = argparse.ArgumentParser()
parser.add_argument("-d", "--data_source", help="mnist or cifar or uniform")
parser.add_argument("--use_gpu", default="1",
help="1 means use gpu for training")
parser.add_argument("--gpu_id", default="0",
help="the gpu_id parameter")
parser.add_argument(
"--use_gpu", default="1", help="1 means use gpu for training")
parser.add_argument("--gpu_id", default="0", help="the gpu_id parameter")
args = parser.parse_args()
data_source = args.data_source
use_gpu = args.use_gpu
@ -209,8 +222,9 @@ def main():
if not os.path.exists("./%s_params/" % data_source):
os.makedirs("./%s_params/" % data_source)
api.initPaddle('--use_gpu=' + use_gpu, '--dot_period=10', '--log_period=100',
'--gpu_id=' + args.gpu_id, '--save_dir=' + "./%s_params/" % data_source)
api.initPaddle('--use_gpu=' + use_gpu, '--dot_period=10',
'--log_period=100', '--gpu_id=' + args.gpu_id,
'--save_dir=' + "./%s_params/" % data_source)
if data_source == "uniform":
conf = "gan_conf.py"
@ -220,7 +234,8 @@ def main():
num_iter = 1000
gen_conf = parse_config(conf, "mode=generator_training,data=" + data_source)
dis_conf = parse_config(conf, "mode=discriminator_training,data=" + data_source)
dis_conf = parse_config(conf,
"mode=discriminator_training,data=" + data_source)
generator_conf = parse_config(conf, "mode=generator,data=" + data_source)
batch_size = dis_conf.opt_config.batch_size
noise_dim = get_layer_size(gen_conf.model_config, "noise")
@ -245,11 +260,9 @@ def main():
generator_machine = api.GradientMachine.createFromConfigProto(
generator_conf.model_config)
dis_trainer = api.Trainer.create(
dis_conf, dis_training_machine)
dis_trainer = api.Trainer.create(dis_conf, dis_training_machine)
gen_trainer = api.Trainer.create(
gen_conf, gen_training_machine)
gen_trainer = api.Trainer.create(gen_conf, gen_training_machine)
dis_trainer.startTrain()
gen_trainer.startTrain()
@ -272,21 +285,23 @@ def main():
noise = get_noise(batch_size, noise_dim)
data_batch_dis_pos = prepare_discriminator_data_batch_pos(
batch_size, data_np)
dis_loss_pos = get_training_loss(dis_training_machine, data_batch_dis_pos)
dis_loss_pos = get_training_loss(dis_training_machine,
data_batch_dis_pos)
data_batch_dis_neg = prepare_discriminator_data_batch_neg(
generator_machine, batch_size, noise)
dis_loss_neg = get_training_loss(dis_training_machine, data_batch_dis_neg)
dis_loss_neg = get_training_loss(dis_training_machine,
data_batch_dis_neg)
dis_loss = (dis_loss_pos + dis_loss_neg) / 2.0
# Do forward pass in generator to get the gen_loss
data_batch_gen = prepare_generator_data_batch(
batch_size, noise)
data_batch_gen = prepare_generator_data_batch(batch_size, noise)
gen_loss = get_training_loss(gen_training_machine, data_batch_gen)
if i % 100 == 0:
print "d_pos_loss is %s d_neg_loss is %s" % (dis_loss_pos, dis_loss_neg)
print "d_pos_loss is %s d_neg_loss is %s" % (dis_loss_pos,
dis_loss_neg)
print "d_loss is %s g_loss is %s" % (dis_loss, gen_loss)
# Decide which network to train based on the training history
@ -300,7 +315,8 @@ def main():
curr_strike = 1
dis_trainer.trainOneDataBatch(batch_size, data_batch_dis_neg)
dis_trainer.trainOneDataBatch(batch_size, data_batch_dis_pos)
copy_shared_parameters(dis_training_machine, gen_training_machine)
copy_shared_parameters(dis_training_machine,
gen_training_machine)
else:
if curr_train == "gen":
@ -311,7 +327,8 @@ def main():
gen_trainer.trainOneDataBatch(batch_size, data_batch_gen)
# TODO: add API for paddle to allow true parameter sharing between different GradientMachines
# so that we do not need to copy shared parameters.
copy_shared_parameters(gen_training_machine, dis_training_machine)
copy_shared_parameters(gen_training_machine,
dis_training_machine)
copy_shared_parameters(gen_training_machine, generator_machine)
dis_trainer.finishTrainPass()
@ -319,11 +336,14 @@ def main():
# At the end of each pass, save the generated samples/images
fake_samples = get_fake_samples(generator_machine, batch_size, noise)
if data_source == "uniform":
plot2DScatter(fake_samples, "./%s_samples/train_pass%s.png" % (data_source, train_pass))
plot2DScatter(fake_samples, "./%s_samples/train_pass%s.png" %
(data_source, train_pass))
else:
save_images(fake_samples, "./%s_samples/train_pass%s.png" % (data_source, train_pass))
save_images(fake_samples, "./%s_samples/train_pass%s.png" %
(data_source, train_pass))
dis_trainer.finishTrain()
gen_trainer.finishTrain()
if __name__ == '__main__':
main()

@ -13,7 +13,6 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""
This configuration is a demonstration of how to implement the stacked LSTM
with residual connections, i.e. an LSTM layer takes the sum of the hidden states
@ -46,7 +45,8 @@ is_predict = get_config_arg('is_predict', bool, False)
trn = 'data/train.list' if not is_predict else None
tst = 'data/test.list' if not is_predict else 'data/pred.list'
process = 'process' if not is_predict else 'process_predict'
define_py_data_sources2(train_list=trn,
define_py_data_sources2(
train_list=trn,
test_list=tst,
module="dataprovider_emb",
obj=process,
@ -58,8 +58,7 @@ settings(
learning_rate=2e-3,
learning_method=AdamOptimizer(),
regularization=L2Regularization(8e-4),
gradient_clipping_threshold=25
)
gradient_clipping_threshold=25)
bias_attr = ParamAttr(initial_std=0., l2_rate=0.)
@ -73,17 +72,15 @@ for i in range(3):
# The input to the current layer is the sum of the hidden state
# and input of the previous layer.
current_input = addto_layer(input=[previous_input, previous_hidden_state])
hidden_state = simple_lstm(input=current_input, size=128,
lstm_cell_attr=ExtraAttr(drop_rate=0.1))
hidden_state = simple_lstm(
input=current_input, size=128, lstm_cell_attr=ExtraAttr(drop_rate=0.1))
previous_input, previous_hidden_state = current_input, hidden_state
lstm = previous_hidden_state
lstm_last = pooling_layer(input=lstm, pooling_type=MaxPooling())
output = fc_layer(input=lstm_last, size=2,
bias_attr=bias_attr,
act=SoftmaxActivation())
output = fc_layer(
input=lstm_last, size=2, bias_attr=bias_attr, act=SoftmaxActivation())
if is_predict:
maxid = maxid_layer(output)

@ -69,7 +69,6 @@ def extract_dict_features(pair_file, feature_file):
feature_out.write(feature_str + '\n')
if __name__ == '__main__':
usage = '-p pair_file -f feature_file'

@ -30,8 +30,7 @@ def hook(settings, word_dict, label_dict, predicate_dict, **kwargs):
integer_value_sequence(len(word_dict)),
integer_value_sequence(len(word_dict)),
integer_value_sequence(len(word_dict)),
integer_value_sequence(len(predicate_dict)),
integer_value_sequence(2),
integer_value_sequence(len(predicate_dict)), integer_value_sequence(2),
integer_value_sequence(len(label_dict))
]
@ -40,8 +39,12 @@ def get_batch_size(yeild_data):
return len(yeild_data[0])
@provider(init_hook=hook, should_shuffle=True, calc_batch_size=get_batch_size,
can_over_batch_size=False, cache=CacheType.CACHE_PASS_IN_MEM)
@provider(
init_hook=hook,
should_shuffle=True,
calc_batch_size=get_batch_size,
can_over_batch_size=False,
cache=CacheType.CACHE_PASS_IN_MEM)
def process(settings, file_name):
with open(file_name, 'r') as fdata:
for line in fdata:

@ -47,7 +47,6 @@ if not is_predict:
w = line.strip()
predicate_dict[w] = i
if is_test:
train_list_file = None
@ -57,9 +56,11 @@ if not is_predict:
test_list=test_list_file,
module='dataprovider',
obj='process',
args={'word_dict': word_dict,
args={
'word_dict': word_dict,
'label_dict': label_dict,
'predicate_dict': predicate_dict })
'predicate_dict': predicate_dict
})
word_dict_len = len(word_dict)
label_dict_len = len(label_dict)
@ -77,24 +78,16 @@ mark_dim = 5
hidden_dim = 512
depth = 8
########################### Optimizer #######################################
settings(
batch_size=150,
learning_method=MomentumOptimizer(momentum=0),
learning_rate=2e-2,
regularization=L2Regularization(8e-4),
is_async=False,
model_average=ModelAverage(average_window=0.5,
max_average_window=10000),
)
model_average=ModelAverage(
average_window=0.5, max_average_window=10000), )
####################################### network ##############################
#8 features and 1 target
@ -108,22 +101,28 @@ ctx_p1 = data_layer(name='ctx_p1_data', size=word_dict_len)
ctx_p2 = data_layer(name='ctx_p2_data', size=word_dict_len)
mark = data_layer(name='mark_data', size=mark_dict_len)
if not is_predict:
target = data_layer(name='target', size=label_dict_len)
default_std = 1 / math.sqrt(hidden_dim) / 3.0
emb_para = ParameterAttribute(name='emb', initial_std=0., learning_rate=0.)
std_0 = ParameterAttribute(initial_std=0.)
std_default = ParameterAttribute(initial_std=default_std)
predicate_embedding = embedding_layer(size=word_dim, input=predicate, param_attr=ParameterAttribute(name='vemb',initial_std=default_std))
mark_embedding = embedding_layer(name='word_ctx-in_embedding', size=mark_dim, input=mark, param_attr=std_0)
predicate_embedding = embedding_layer(
size=word_dim,
input=predicate,
param_attr=ParameterAttribute(
name='vemb', initial_std=default_std))
mark_embedding = embedding_layer(
name='word_ctx-in_embedding', size=mark_dim, input=mark, param_attr=std_0)
word_input = [word, ctx_n2, ctx_n1, ctx_0, ctx_p1, ctx_p2]
emb_layers = [embedding_layer(size=word_dim, input=x, param_attr=emb_para) for x in word_input]
emb_layers = [
embedding_layer(
size=word_dim, input=x, param_attr=emb_para) for x in word_input
]
emb_layers.append(predicate_embedding)
emb_layers.append(mark_embedding)
@ -131,14 +130,18 @@ hidden_0 = mixed_layer(
name='hidden0',
size=hidden_dim,
bias_attr=std_default,
input=[ full_matrix_projection(input=emb, param_attr=std_default ) for emb in emb_layers ])
input=[
full_matrix_projection(
input=emb, param_attr=std_default) for emb in emb_layers
])
mix_hidden_lr = 1e-3
lstm_para_attr = ParameterAttribute(initial_std=0.0, learning_rate=1.0)
hidden_para_attr = ParameterAttribute(initial_std=default_std, learning_rate=mix_hidden_lr)
hidden_para_attr = ParameterAttribute(
initial_std=default_std, learning_rate=mix_hidden_lr)
lstm_0 = lstmemory(name='lstm0',
lstm_0 = lstmemory(
name='lstm0',
input=hidden_0,
act=ReluActivation(),
gate_act=SigmoidActivation(),
@ -149,18 +152,21 @@ lstm_0 = lstmemory(name='lstm0',
#stack L-LSTM and R-LSTM with direct edges
input_tmp = [hidden_0, lstm_0]
for i in range(1, depth):
mix_hidden = mixed_layer(name='hidden'+str(i),
mix_hidden = mixed_layer(
name='hidden' + str(i),
size=hidden_dim,
bias_attr=std_default,
input=[full_matrix_projection(input=input_tmp[0], param_attr=hidden_para_attr),
full_matrix_projection(input=input_tmp[1], param_attr=lstm_para_attr)
]
)
lstm = lstmemory(name='lstm'+str(i),
input=[
full_matrix_projection(
input=input_tmp[0], param_attr=hidden_para_attr),
full_matrix_projection(
input=input_tmp[1], param_attr=lstm_para_attr)
])
lstm = lstmemory(
name='lstm' + str(i),
input=mix_hidden,
act=ReluActivation(),
gate_act=SigmoidActivation(),
@ -171,44 +177,42 @@ for i in range(1, depth):
input_tmp = [mix_hidden, lstm]
feature_out = mixed_layer(name='output',
feature_out = mixed_layer(
name='output',
size=label_dict_len,
bias_attr=std_default,
input=[full_matrix_projection(input=input_tmp[0], param_attr=hidden_para_attr),
full_matrix_projection(input=input_tmp[1], param_attr=lstm_para_attr)
],
)
input=[
full_matrix_projection(
input=input_tmp[0], param_attr=hidden_para_attr),
full_matrix_projection(
input=input_tmp[1], param_attr=lstm_para_attr)
], )
if not is_predict:
crf_l = crf_layer( name = 'crf',
crf_l = crf_layer(
name='crf',
size=label_dict_len,
input=feature_out,
label=target,
param_attr=ParameterAttribute(name='crfw',initial_std=default_std, learning_rate=mix_hidden_lr)
)
param_attr=ParameterAttribute(
name='crfw', initial_std=default_std, learning_rate=mix_hidden_lr))
crf_dec_l = crf_decoding_layer(name = 'crf_dec_l',
crf_dec_l = crf_decoding_layer(
name='crf_dec_l',
size=label_dict_len,
input=feature_out,
label=target,
param_attr=ParameterAttribute(name='crfw')
)
param_attr=ParameterAttribute(name='crfw'))
eval = sum_evaluator(input=crf_dec_l)
outputs(crf_l)
else:
crf_dec_l = crf_decoding_layer(name = 'crf_dec_l',
crf_dec_l = crf_decoding_layer(
name='crf_dec_l',
size=label_dict_len,
input=feature_out,
param_attr=ParameterAttribute(name='crfw')
)
param_attr=ParameterAttribute(name='crfw'))
outputs(crf_dec_l)

@ -26,7 +26,8 @@ UNK_IDX = 0
class Prediction():
def __init__(self, train_conf, dict_file, model_dir, label_file, predicate_dict_file):
def __init__(self, train_conf, dict_file, model_dir, label_file,
predicate_dict_file):
"""
train_conf: trainer configure.
dict_file: word dictionary file name.
@ -44,24 +45,17 @@ class Prediction():
len_pred = len(self.predicate_dict)
conf = parse_config(
train_conf,
'dict_len=' + str(len_dict) +
',label_len=' + str(len_label) +
',pred_len=' + str(len_pred) +
',is_predict=True')
train_conf, 'dict_len=' + str(len_dict) + ',label_len=' +
str(len_label) + ',pred_len=' + str(len_pred) + ',is_predict=True')
self.network = swig_paddle.GradientMachine.createFromConfigProto(
conf.model_config)
self.network.loadParameters(model_dir)
slots = [
integer_value_sequence(len_dict),
integer_value_sequence(len_dict),
integer_value_sequence(len_dict),
integer_value_sequence(len_dict),
integer_value_sequence(len_dict),
integer_value_sequence(len_dict),
integer_value_sequence(len_pred),
integer_value_sequence(2)
integer_value_sequence(len_dict), integer_value_sequence(len_dict),
integer_value_sequence(len_dict), integer_value_sequence(len_dict),
integer_value_sequence(len_dict), integer_value_sequence(len_dict),
integer_value_sequence(len_pred), integer_value_sequence(2)
]
self.converter = DataProviderConverter(slots)
@ -78,6 +72,7 @@ class Prediction():
for line_count, line in enumerate(open(predicate_dict_file, 'r')):
self.predicate_dict[line.strip()] = line_count
def get_data(self, data_file):
"""
Get input data of paddle format.
@ -90,7 +85,8 @@ class Prediction():
sen_len = len(words)
word_slot = [self.dict.get(w, UNK_IDX) for w in words]
predicate_slot = [self.predicate_dict.get(predicate, UNK_IDX)] * sen_len
predicate_slot = [self.predicate_dict.get(predicate, UNK_IDX)
] * sen_len
ctx_n2_slot = [self.dict.get(ctx_n2, UNK_IDX)] * sen_len
ctx_n1_slot = [self.dict.get(ctx_n1, UNK_IDX)] * sen_len
ctx_0_slot = [self.dict.get(ctx_0, UNK_IDX)] * sen_len
@ -123,7 +119,8 @@ class Prediction():
def option_parser():
usage = ("python predict.py -c config -w model_dir "
usage = (
"python predict.py -c config -w model_dir "
"-d word dictionary -l label_file -i input_file -p pred_dict_file")
parser = OptionParser(usage="usage: %s [options]" % usage)
parser.add_option(
@ -187,7 +184,8 @@ def main():
output_file = options.output_file
swig_paddle.initPaddle("--use_gpu=0")
predict = Prediction(train_conf, dict_file, model_path, label_file, predict_dict_file)
predict = Prediction(train_conf, dict_file, model_path, label_file,
predict_dict_file)
predict.predict(data_file, output_file)

@ -71,9 +71,7 @@ class SentimentPrediction():
transform word into integer index according to the dictionary.
"""
words = data.strip().split()
word_slot = [
self.word_dict[w] for w in words if w in self.word_dict
]
word_slot = [self.word_dict[w] for w in words if w in self.word_dict]
return word_slot
def batch_predict(self, data_batch):
@ -85,8 +83,8 @@ class SentimentPrediction():
if self.label is None:
print("predicting label is %d" % (lab[0]))
else:
print("predicting label is %s" %
(self.label[lab[0]]))
print("predicting label is %s" % (self.label[lab[0]]))
def option_parser():
usage = "python predict.py -n config -w model_dir -d dictionary -i input_file "
@ -147,5 +145,6 @@ def main():
if len(batch) > 0:
predict.batch_predict(batch)
if __name__ == '__main__':
main()

@ -1,4 +1,4 @@
.. _api_pydataprovider:
.. _api_pydataprovider2_en:
PyDataProvider2
===============
@ -104,6 +104,8 @@ And PaddlePadle will do all of the rest things\:
Is this cool?
.. _api_pydataprovider2_en_sequential_model:
DataProvider for the sequential model
-------------------------------------
A sequence model takes sequences as its input. A sequence is made up of several

@ -23,7 +23,7 @@ python's :code:`help()` function. Let's walk through the above python script:
* At the beginning, use :code:`swig_paddle.initPaddle()` to initialize
PaddlePaddle with command line arguments, for more about command line arguments
see `Command Line Arguments <../cmd_argument/detail_introduction.html>`_.
see :ref:`cmd_detail_introduction_en` .
* Parse the configuration file that is used in training with :code:`parse_config()`.
Because data to predict with always have no label, and output of prediction work
normally is the output layer rather than the cost layer, so you should modify
@ -36,7 +36,7 @@ python's :code:`help()` function. Let's walk through the above python script:
- Note: As swig_paddle can only accept C++ matrices, we offer a utility
class DataProviderConverter that can accept the same input data with
PyDataProvider2, for more information please refer to document
of `PyDataProvider2 <../data_provider/pydataprovider2.html>`_.
of :ref:`api_pydataprovider2_en` .
* Do the prediction with :code:`forwardTest()`, which takes the converted
input data and outputs the activations of the output layer.

@ -1,3 +1,5 @@
.. _api_trainer_config_helpers_layers:
======
Layers
======

@ -99,11 +99,3 @@ In PaddlePaddle, training is just to get a collection of model parameters, which
Although starts from a random guess, you can see that value of ``w`` changes quickly towards 2 and ``b`` changes quickly towards 0.3. In the end, the predicted line is almost identical with real answer.
There, you have recovered the underlying pattern between ``X`` and ``Y`` only from observed data.
5. Where to Go from Here
-------------------------
- `Install and Build <../build_and_install/index.html>`_
- `Tutorials <../demo/quick_start/index_en.html>`_
- `Example and Demo <../demo/index.html>`_

@ -14,6 +14,13 @@ cd paddle
git submodule update --init --recursive
```
If you already have a local PaddlePaddle repo and have not initialized the submodule, your local submodule folder will be empty. You can simply run the last line of the above codes in your PaddlePaddle home directory to initialize your submodule folder.
If you have already initialized your submodule and you would like to sync with the upstream submodule repo, you can run the following command
```
git submodule update --remote
```
## <span id="requirements">Requirements</span>
To compile the source code, your computer must be equipped with the following dependencies.

@ -122,9 +122,9 @@ The general development workflow with Docker and Bazel is as follows:
git clone --recursive https://github.com/paddlepaddle/paddle
2. Build a development Docker image `paddle:dev` from the source code.
This image contains all the development tools and dependencies of
PaddlePaddle.
2. Build a development Docker image :code:`paddle:dev` from the source
code. This image contains all the development tools and
dependencies of PaddlePaddle.
.. code-block:: bash
@ -140,13 +140,21 @@ The general development workflow with Docker and Bazel is as follows:
.. code-block:: bash
docker run \
-d # run the container in background mode \
--name paddle # we can run a nginx container to serve documents \
-p 2022:22 # so we can SSH into this container \
-v $PWD:/paddle # mount the source code \
-v $HOME/.cache/bazel:/root/.cache/bazel # mount Bazel cache \
-d \
--name paddle \
-p 2022:22 \
-v $PWD:/paddle \
-v $HOME/.cache/bazel:/root/.cache/bazel \
paddle:dev
where :code:`-d` makes the container running in background,
:code:`--name paddle` allows us to run a nginx container to serve
documents in this container, :code:`-p 2022:22` allows us to SSH
into this container, :code:`-v $PWD:/paddle` shares the source code
on the host with the container, :code:`-v
$HOME/.cache/bazel:/root/.cache/bazel` shares Bazel cache on the
host with the container.
4. SSH into the container:
.. code-block:: bash

@ -1,3 +1,7 @@
```eval_rst
.. _cmd_detail_introduction_en:
```
# Detail Description
## Common

@ -30,7 +30,7 @@ Then at the :code:`process` function, each :code:`yield` function will return th
yield src_ids, trg_ids, trg_ids_next
For more details description of how to write a data provider, please refer to `PyDataProvider2 <../../ui/data_provider/index.html>`_. The full data provider file is located at :code:`demo/seqToseq/dataprovider.py`.
For more details description of how to write a data provider, please refer to :ref:`api_pydataprovider2_en` . The full data provider file is located at :code:`demo/seqToseq/dataprovider.py`.
===============================================
Configure Recurrent Neural Network Architecture
@ -106,7 +106,7 @@ We will use the sequence to sequence model with attention as an example to demon
In this model, the source sequence :math:`S = \{s_1, \dots, s_T\}` is encoded with a bidirectional gated recurrent neural networks. The hidden states of the bidirectional gated recurrent neural network :math:`H_S = \{H_1, \dots, H_T\}` is called *encoder vector* The decoder is a gated recurrent neural network. When decoding each token :math:`y_t`, the gated recurrent neural network generates a set of weights :math:`W_S^t = \{W_1^t, \dots, W_T^t\}`, which are used to compute a weighted sum of the encoder vector. The weighted sum of the encoder vector is utilized to condition the generation of the token :math:`y_t`.
The encoder part of the model is listed below. It calls :code:`grumemory` to represent gated recurrent neural network. It is the recommended way of using recurrent neural network if the network architecture is simple, because it is faster than :code:`recurrent_group`. We have implemented most of the commonly used recurrent neural network architectures, you can refer to `Layers <../../ui/api/trainer_config_helpers/layers_index.html>`_ for more details.
The encoder part of the model is listed below. It calls :code:`grumemory` to represent gated recurrent neural network. It is the recommended way of using recurrent neural network if the network architecture is simple, because it is faster than :code:`recurrent_group`. We have implemented most of the commonly used recurrent neural network architectures, you can refer to :ref:`api_trainer_config_helpers_layers` for more details.
We also project the encoder vector to :code:`decoder_size` dimensional space, get the first instance of the backward recurrent network, and project it to :code:`decoder_size` dimensional space:
@ -246,6 +246,6 @@ The code is listed below:
outputs(beam_gen)
Notice that this generation technique is only useful for decoder like generation process. If you are working on sequence tagging tasks, please refer to `Semantic Role Labeling Demo <../../demo/semantic_role_labeling/index.html>`_ for more details.
Notice that this generation technique is only useful for decoder like generation process. If you are working on sequence tagging tasks, please refer to :ref:`semantic_role_labeling_en` for more details.
The full configuration file is located at :code:`demo/seqToseq/seqToseq_net.py`.

@ -51,7 +51,7 @@ In this tutorial, we will focus on nvprof and nvvp.
:code:`test_GpuProfiler` from :code:`paddle/math/tests` directory will be used to evaluate
above profilers.
.. literalinclude:: ../../paddle/math/tests/test_GpuProfiler.cpp
.. literalinclude:: ../../../paddle/math/tests/test_GpuProfiler.cpp
:language: c++
:lines: 111-124
:linenos:
@ -77,7 +77,7 @@ As a simple example, consider the following:
1. Add :code:`REGISTER_TIMER_INFO` and :code:`printAllStatus` functions (see the emphasize-lines).
.. literalinclude:: ../../paddle/math/tests/test_GpuProfiler.cpp
.. literalinclude:: ../../../paddle/math/tests/test_GpuProfiler.cpp
:language: c++
:lines: 111-124
:emphasize-lines: 8-10,13
@ -124,7 +124,7 @@ To use this command line profiler **nvprof**, you can simply issue the following
1. Add :code:`REGISTER_GPU_PROFILER` function (see the emphasize-lines).
.. literalinclude:: ../../paddle/math/tests/test_GpuProfiler.cpp
.. literalinclude:: ../../../paddle/math/tests/test_GpuProfiler.cpp
:language: c++
:lines: 111-124
:emphasize-lines: 6-7

@ -93,7 +93,7 @@ where `train.sh` is almost the same as `demo/seqToseq/translation/train.sh`, the
- `--init_model_path`: path of the initialization model, here is `data/paraphrase_model`
- `--load_missing_parameter_strategy`: operations when model file is missing, here use a normal distibution to initialize the other parameters except for the embedding layer
For users who want to understand the dataset format, model architecture and training procedure in detail, please refer to [Text generation Tutorial](../text_generation/text_generation.md).
For users who want to understand the dataset format, model architecture and training procedure in detail, please refer to [Text generation Tutorial](../text_generation/index_en.md).
## Optional Function ##
### Embedding Parameters Observation

@ -12,7 +12,7 @@ This tutorial will teach the basics of deep learning (DL), including how to impl
To get started, please install PaddlePaddle on your computer. Throughout this tutorial, you will learn by implementing different DL models for text classification.
To install PaddlePaddle, please follow the instructions here: <a href = "../../build/index.html" >Build and Install</a>.
To install PaddlePaddle, please follow the instructions here: <a href = "../../getstarted/build_and_install/index_en.html" >Build and Install</a>.
## Overview
For the first step, you will use PaddlePaddle to build a **text classification** system. For example, suppose you run an e-commence website, and you want to analyze the sentiment of user reviews to evaluate product quality.
@ -156,14 +156,14 @@ define_py_data_sources2(train_list='data/train.list',
obj="process",
args={"dictionary": word_dict})
```
You can refer to the following link for more detailed examples and data formats: <a href = "../../ui/data_provider/pydataprovider2.html">PyDataProvider2</a>.
You can refer to the following link for more detailed examples and data formats: <a href = "../../api/data_provider/pydataprovider2_en.html">PyDataProvider2</a>.
## Network Architecture
You will describe four kinds of network architectures in this section.
<center> ![](./PipelineNetwork_en.jpg) </center>
First, you will build a logistic regression model. Later, you will also get chance to build other more powerful network architectures.
For more detailed documentation, you could refer to: <a href = "../../ui/api/trainer_config_helpers/layers_index.html">Layer documentation</a>All configuration files are in `demo/quick_start` directory.
For more detailed documentation, you could refer to: <a href = "../../api/trainer_config_helpers/layers.html">layer documentation</a>. All configuration files are in `demo/quick_start` directory.
### Logistic Regression
The architecture is illustrated in the following picture:
@ -366,7 +366,7 @@ You can use single layer LSTM model with Dropout for our text classification pro
<br>
## Optimization Algorithm
<a href = "../../ui/api/trainer_config_helpers/optimizers.html">Optimization algorithms</a> include Momentum, RMSProp, AdaDelta, AdaGrad, Adam, and Adamax. You can use Adam optimization method here, with L2 regularization and gradient clipping, because Adam has been proved to work very well for training recurrent neural network.
<a href = "../../api/trainer_config_helpers/optimizers.html">Optimization algorithms</a> include Momentum, RMSProp, AdaDelta, AdaGrad, Adam, and Adamax. You can use Adam optimization method here, with L2 regularization and gradient clipping, because Adam has been proved to work very well for training recurrent neural network.
```python
settings(batch_size=128,
@ -391,7 +391,8 @@ paddle train \
--use_gpu=false
```
If you want to install the remote training platform, which enables distributed training on clusters, follow the instructions here: <a href = "../../cluster/index.html">Platform</a> documentation. We do not provide examples on how to train on clusters. Please refer to other demos or platform training documentation for mode details on training on clusters.
We do not provide examples on how to train on clusters here. If you want to train on clusters, please follow the <a href = "../../howto/cluster/cluster_train_en.html">distributed training</a> documentation or other demos for more details.
## Inference
You can use the trained model to perform prediction on the dataset with no labels. You can also evaluate the model on dataset with labels to obtain its test accuracy.
<center> ![](./PipelineTest_en.png) </center>
@ -406,7 +407,7 @@ paddle train \
--init_model_path=./output/pass-0000x
```
We will give an example of performing prediction using Recurrent model on a dataset with no labels. You can refer to: <a href = "../../ui/predict/swig_py_paddle_en.html">Python Prediction API</a> tutorialor other <a href = "../../demo/index.html">demo</a> for the prediction process using Python. You can also use the following script for inference or evaluation.
We will give an example of performing prediction using Recurrent model on a dataset with no labels. You can refer to <a href = "../../api/predict/swig_py_paddle_en.html">Python Prediction API</a> tutorialor other <a href = "../../tutorials/index_en.html">demo</a> for the prediction process using Python. You can also use the following script for inference or evaluation.
inference script (predict.sh)
@ -508,7 +509,7 @@ The scripts of data downloading, network configurations, and training scrips are
* \--config_argsOther configuration arguments.
* \--init_model_pathThe path of the initial model parameter.
By default, the trainer will save model every pass. You can also specify `saving_period_by_batches` to set the frequency of batch saving. You can use `show_parameter_stats_period` to print the statistics of the parameters, which are very useful for tuning parameters. Other command line arguments can be found in <a href = "../../ui/index.html#command-line-argument">command line argument documentation</a>
By default, the trainer will save model every pass. You can also specify `saving_period_by_batches` to set the frequency of batch saving. You can use `show_parameter_stats_period` to print the statistics of the parameters, which are very useful for tuning parameters. Other command line arguments can be found in <a href = "../../howto/cmd_parameter/index_en.html">command line argument documentation</a>
### Log

Some files were not shown because too many files have changed in this diff Show More

Loading…
Cancel
Save