Merge pull request #5615 from helinwang/demo

Simpily demo, add paddle.default_main_program() and paddle.default_startup_program()
mobile_baidu
helinwang 7 years ago committed by GitHub
commit 3d74615b0d
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23

@ -37,6 +37,8 @@ import model
import paddle.trainer.config_parser as cp import paddle.trainer.config_parser as cp
__all__ = [ __all__ = [
'default_startup_program',
'default_main_program',
'optimizer', 'optimizer',
'layer', 'layer',
'activation', 'activation',

@ -4,7 +4,7 @@ import collections
import numpy as np import numpy as np
import copy import copy
__all__ = ['Block', 'Variable', 'Program', 'Operator'] __all__ = ['Block', 'Variable', 'Program', 'Operator', 'default_startup_program', 'default_main_program']
def unique_name(prefix): def unique_name(prefix):
@ -562,3 +562,9 @@ class Parameter(Variable):
# program is a global instance. # program is a global instance.
g_main_program = Program() g_main_program = Program()
g_startup_program = Program() g_startup_program = Program()
def default_startup_program():
return g_startup_program
def default_main_program():
return g_main_program

@ -2,45 +2,33 @@ import paddle.v2 as paddle
import paddle.v2.fluid.layers as layers import paddle.v2.fluid.layers as layers
import paddle.v2.fluid.core as core import paddle.v2.fluid.core as core
import paddle.v2.fluid.optimizer as optimizer import paddle.v2.fluid.optimizer as optimizer
import paddle.v2.fluid.framework as framework
from paddle.v2.fluid.framework import Program
from paddle.v2.fluid.io import save_persistables, load_persistables from paddle.v2.fluid.io import save_persistables, load_persistables
from paddle.v2.fluid.executor import Executor from paddle.v2.fluid.executor import Executor
import numpy as np import numpy as np
startup_program = Program()
main_program = Program()
x = layers.data( x = layers.data(
name='x', name='x',
shape=[13], shape=[13],
data_type='float32', data_type='float32')
main_program=main_program,
startup_program=startup_program)
y_predict = layers.fc(input=x, y_predict = layers.fc(input=x,
size=1, size=1,
act=None, act=None)
main_program=main_program,
startup_program=startup_program)
y = layers.data( y = layers.data(
name='y', name='y',
shape=[1], shape=[1],
data_type='float32', data_type='float32')
main_program=main_program,
startup_program=startup_program)
cost = layers.square_error_cost( cost = layers.square_error_cost(
input=y_predict, input=y_predict,
label=y, label=y)
main_program=main_program, avg_cost = layers.mean(x=cost)
startup_program=startup_program)
avg_cost = layers.mean(
x=cost, main_program=main_program, startup_program=startup_program)
sgd_optimizer = optimizer.SGDOptimizer(learning_rate=0.001) sgd_optimizer = optimizer.SGDOptimizer(learning_rate=0.001)
opts = sgd_optimizer.minimize(avg_cost, startup_program) opts = sgd_optimizer.minimize(avg_cost)
BATCH_SIZE = 20 BATCH_SIZE = 20
@ -52,12 +40,12 @@ train_reader = paddle.batch(
place = core.CPUPlace() place = core.CPUPlace()
exe = Executor(place) exe = Executor(place)
exe.run(startup_program) exe.run(framework.default_startup_program())
PASS_NUM = 100 PASS_NUM = 100
for pass_id in range(PASS_NUM): for pass_id in range(PASS_NUM):
save_persistables(exe, "./fit_a_line.model/", main_program=main_program) save_persistables(exe, "./fit_a_line.model/")
load_persistables(exe, "./fit_a_line.model/", main_program=main_program) load_persistables(exe, "./fit_a_line.model/")
for data in train_reader(): for data in train_reader():
x_data = np.array(map(lambda x: x[0], data)).astype("float32") x_data = np.array(map(lambda x: x[0], data)).astype("float32")
y_data = np.array(map(lambda x: x[1], data)).astype("float32") y_data = np.array(map(lambda x: x[1], data)).astype("float32")
@ -69,7 +57,7 @@ for pass_id in range(PASS_NUM):
tensor_y = core.LoDTensor() tensor_y = core.LoDTensor()
tensor_y.set(y_data, place) tensor_y.set(y_data, place)
# print tensor_y.get_dims() # print tensor_y.get_dims()
outs = exe.run(main_program, outs = exe.run(framework.default_main_program(),
feed={'x': tensor_x, feed={'x': tensor_x,
'y': tensor_y}, 'y': tensor_y},
fetch_list=[avg_cost]) fetch_list=[avg_cost])

@ -5,19 +5,17 @@ import paddle.v2.fluid.layers as layers
import paddle.v2.fluid.nets as nets import paddle.v2.fluid.nets as nets
import paddle.v2.fluid.optimizer as optimizer import paddle.v2.fluid.optimizer as optimizer
from paddle.v2.fluid.executor import Executor from paddle.v2.fluid.executor import Executor
from paddle.v2.fluid.framework import g_startup_program, g_main_program import paddle.v2.fluid.framework as framework
from paddle.v2.fluid.initializer import XavierInitializer from paddle.v2.fluid.initializer import XavierInitializer
def resnet_cifar10(input, depth=32, main_program=None, startup_program=None): def resnet_cifar10(input, depth=32):
def conv_bn_layer(input, def conv_bn_layer(input,
ch_out, ch_out,
filter_size, filter_size,
stride, stride,
padding, padding,
act='relu', act='relu'):
main_program=None,
startup_program=None):
tmp = layers.conv2d( tmp = layers.conv2d(
input=input, input=input,
filter_size=filter_size, filter_size=filter_size,
@ -25,14 +23,10 @@ def resnet_cifar10(input, depth=32, main_program=None, startup_program=None):
stride=stride, stride=stride,
padding=padding, padding=padding,
act=None, act=None,
bias_attr=False, bias_attr=False)
main_program=main_program,
startup_program=startup_program)
return layers.batch_norm( return layers.batch_norm(
input=tmp, input=tmp,
act=act, act=act)
main_program=main_program,
startup_program=startup_program)
def shortcut(input, ch_in, ch_out, stride, program, init_program): def shortcut(input, ch_in, ch_out, stride, program, init_program):
if ch_in != ch_out: if ch_in != ch_out:
@ -44,40 +38,30 @@ def resnet_cifar10(input, depth=32, main_program=None, startup_program=None):
def basicblock(input, def basicblock(input,
ch_in, ch_in,
ch_out, ch_out,
stride, stride):
main_program=main_program,
startup_program=startup_program):
tmp = conv_bn_layer( tmp = conv_bn_layer(
input, input,
ch_out, ch_out,
3, 3,
stride, stride,
1, 1)
main_program=main_program,
startup_program=startup_program)
tmp = conv_bn_layer( tmp = conv_bn_layer(
tmp, tmp,
ch_out, ch_out,
3, 3,
1, 1,
1, 1,
act=None, act=None)
main_program=main_program, short = shortcut(input, ch_in, ch_out, stride)
startup_program=startup_program)
short = shortcut(input, ch_in, ch_out, stride, main_program,
startup_program)
return layers.elementwise_add( return layers.elementwise_add(
x=tmp, x=tmp,
y=short, y=short,
act='relu', act='relu')
main_program=main_program,
startup_program=startup_program)
def layer_warp(block_func, input, ch_in, ch_out, count, stride, program, def layer_warp(block_func, input, ch_in, ch_out, count, stride):
startup_program): tmp = block_func(input, ch_in, ch_out, stride)
tmp = block_func(input, ch_in, ch_out, stride, program, startup_program)
for i in range(1, count): for i in range(1, count):
tmp = block_func(tmp, ch_out, ch_out, 1, program, startup_program) tmp = block_func(tmp, ch_out, ch_out, 1)
return tmp return tmp
assert (depth - 2) % 6 == 0 assert (depth - 2) % 6 == 0
@ -87,53 +71,41 @@ def resnet_cifar10(input, depth=32, main_program=None, startup_program=None):
ch_out=16, ch_out=16,
filter_size=3, filter_size=3,
stride=1, stride=1,
padding=1, padding=1)
main_program=main_program,
startup_program=startup_program)
res1 = layer_warp( res1 = layer_warp(
basicblock, basicblock,
conv1, conv1,
16, 16,
16, 16,
n, n,
1, 1)
main_program=main_program,
startup_program=startup_program)
res2 = layer_warp( res2 = layer_warp(
basicblock, basicblock,
res1, res1,
16, 16,
32, 32,
n, n,
2, 2)
main_program=main_program,
startup_program=startup_program)
res3 = layer_warp( res3 = layer_warp(
basicblock, basicblock,
res2, res2,
32, 32,
64, 64,
n, n,
2, 2)
main_program=main_program,
startup_program=startup_program)
pool = layers.pool2d( pool = layers.pool2d(
input=res3, input=res3,
pool_size=8, pool_size=8,
pool_type='avg', pool_type='avg',
pool_stride=1, pool_stride=1)
main_program=main_program,
startup_program=startup_program)
return pool return pool
def vgg16_bn_drop(input, main_program=None, startup_program=None): def vgg16_bn_drop(input):
def conv_block(input, def conv_block(input,
num_filter, num_filter,
groups, groups,
dropouts, dropouts):
main_program=None,
startup_program=None):
return nets.img_conv_group( return nets.img_conv_group(
input=input, input=input,
pool_size=2, pool_size=2,
@ -143,51 +115,34 @@ def vgg16_bn_drop(input, main_program=None, startup_program=None):
conv_act='relu', conv_act='relu',
conv_with_batchnorm=True, conv_with_batchnorm=True,
conv_batchnorm_drop_rate=dropouts, conv_batchnorm_drop_rate=dropouts,
pool_type='max', pool_type='max')
main_program=main_program,
startup_program=startup_program)
conv1 = conv_block(input, 64, 2, [0.3, 0], main_program, startup_program) conv1 = conv_block(input, 64, 2, [0.3, 0])
conv2 = conv_block(conv1, 128, 2, [0.4, 0], main_program, startup_program) conv2 = conv_block(conv1, 128, 2, [0.4, 0])
conv3 = conv_block(conv2, 256, 3, [0.4, 0.4, 0], main_program, conv3 = conv_block(conv2, 256, 3, [0.4, 0.4, 0])
startup_program) conv4 = conv_block(conv3, 512, 3, [0.4, 0.4, 0])
conv4 = conv_block(conv3, 512, 3, [0.4, 0.4, 0], main_program, conv5 = conv_block(conv4, 512, 3, [0.4, 0.4, 0])
startup_program)
conv5 = conv_block(conv4, 512, 3, [0.4, 0.4, 0], main_program,
startup_program)
drop = layers.dropout( drop = layers.dropout(
x=conv5, x=conv5,
dropout_prob=0.5, dropout_prob=0.5)
main_program=main_program,
startup_program=startup_program)
fc1 = layers.fc(input=drop, fc1 = layers.fc(input=drop,
size=512, size=512,
act=None, act=None,
param_attr={"initializer": XavierInitializer()}, param_attr={"initializer": XavierInitializer()})
main_program=main_program,
startup_program=startup_program)
reshape1 = layers.reshape( reshape1 = layers.reshape(
x=fc1, x=fc1,
shape=list(fc1.shape + (1, 1)), shape=list(fc1.shape + (1, 1)))
main_program=main_program,
startup_program=startup_program)
bn = layers.batch_norm( bn = layers.batch_norm(
input=reshape1, input=reshape1,
act='relu', act='relu')
main_program=main_program,
startup_program=startup_program)
drop2 = layers.dropout( drop2 = layers.dropout(
x=bn, x=bn,
dropout_prob=0.5, dropout_prob=0.5)
main_program=main_program,
startup_program=startup_program)
fc2 = layers.fc(input=drop2, fc2 = layers.fc(input=drop2,
size=512, size=512,
act=None, act=None,
param_attr={"initializer": XavierInitializer()}, param_attr={"initializer": XavierInitializer()})
main_program=main_program,
startup_program=startup_program)
return fc2 return fc2
@ -225,7 +180,7 @@ train_reader = paddle.batch(
place = core.CPUPlace() place = core.CPUPlace()
exe = Executor(place) exe = Executor(place)
exe.run(g_startup_program) exe.run(framework.default_startup_program())
for pass_id in range(PASS_NUM): for pass_id in range(PASS_NUM):
batch_id = 0 batch_id = 0
@ -243,7 +198,7 @@ for pass_id in range(PASS_NUM):
tensor_img.set(img_data, place) tensor_img.set(img_data, place)
tensor_y.set(y_data, place) tensor_y.set(y_data, place)
outs = exe.run(g_main_program, outs = exe.run(framework.default_main_program(),
feed={"pixel": tensor_img, feed={"pixel": tensor_img,
"label": tensor_y}, "label": tensor_y},
fetch_list=[avg_cost, accuracy]) fetch_list=[avg_cost, accuracy])

@ -4,65 +4,45 @@ import paddle.v2.fluid.nets as nets
import paddle.v2.fluid.core as core import paddle.v2.fluid.core as core
import paddle.v2.fluid.optimizer as optimizer import paddle.v2.fluid.optimizer as optimizer
import paddle.v2.fluid.evaluator as evaluator import paddle.v2.fluid.evaluator as evaluator
import paddle.v2.fluid.framework as framework
from paddle.v2.fluid.framework import Program
from paddle.v2.fluid.executor import Executor from paddle.v2.fluid.executor import Executor
import numpy as np import numpy as np
startup_program = Program()
main_program = Program()
images = layers.data( images = layers.data(
name='pixel', name='pixel',
shape=[1, 28, 28], shape=[1, 28, 28],
data_type='float32', data_type='float32')
main_program=main_program,
startup_program=startup_program)
label = layers.data( label = layers.data(
name='label', name='label',
shape=[1], shape=[1],
data_type='int64', data_type='int64')
main_program=main_program,
startup_program=startup_program)
conv_pool_1 = nets.simple_img_conv_pool( conv_pool_1 = nets.simple_img_conv_pool(
input=images, input=images,
filter_size=5, filter_size=5,
num_filters=20, num_filters=20,
pool_size=2, pool_size=2,
pool_stride=2, pool_stride=2,
act="relu", act="relu")
main_program=main_program,
startup_program=startup_program)
conv_pool_2 = nets.simple_img_conv_pool( conv_pool_2 = nets.simple_img_conv_pool(
input=conv_pool_1, input=conv_pool_1,
filter_size=5, filter_size=5,
num_filters=50, num_filters=50,
pool_size=2, pool_size=2,
pool_stride=2, pool_stride=2,
act="relu", act="relu")
main_program=main_program,
startup_program=startup_program)
predict = layers.fc(input=conv_pool_2, predict = layers.fc(input=conv_pool_2,
size=10, size=10,
act="softmax", act="softmax")
main_program=main_program, cost = layers.cross_entropy(input=predict, label=label)
startup_program=startup_program) avg_cost = layers.mean(x=cost)
cost = layers.cross_entropy(
input=predict,
label=label,
main_program=main_program,
startup_program=startup_program)
avg_cost = layers.mean(x=cost, main_program=main_program)
optimizer = optimizer.AdamOptimizer(learning_rate=0.01, beta1=0.9, beta2=0.999) optimizer = optimizer.AdamOptimizer(learning_rate=0.01, beta1=0.9, beta2=0.999)
opts = optimizer.minimize(avg_cost, startup_program) opts = optimizer.minimize(avg_cost)
accuracy, acc_out = evaluator.accuracy( accuracy, acc_out = evaluator.accuracy(
input=predict, input=predict,
label=label, label=label)
main_program=main_program,
startup_program=startup_program)
BATCH_SIZE = 50 BATCH_SIZE = 50
PASS_NUM = 3 PASS_NUM = 3
@ -74,7 +54,7 @@ train_reader = paddle.batch(
place = core.CPUPlace() place = core.CPUPlace()
exe = Executor(place) exe = Executor(place)
exe.run(startup_program, feed={}, fetch_list=[]) exe.run(framework.default_startup_program())
for pass_id in range(PASS_NUM): for pass_id in range(PASS_NUM):
count = 0 count = 0
@ -90,7 +70,7 @@ for pass_id in range(PASS_NUM):
tensor_img.set(img_data, place) tensor_img.set(img_data, place)
tensor_y.set(y_data, place) tensor_y.set(y_data, place)
outs = exe.run(main_program, outs = exe.run(framework.default_main_program(),
feed={"pixel": tensor_img, feed={"pixel": tensor_img,
"label": tensor_y}, "label": tensor_y},
fetch_list=[avg_cost, acc_out]) fetch_list=[avg_cost, acc_out])

@ -2,8 +2,7 @@ import paddle.v2 as paddle
import paddle.v2.fluid.layers as layers import paddle.v2.fluid.layers as layers
import paddle.v2.fluid.core as core import paddle.v2.fluid.core as core
import paddle.v2.fluid.optimizer as optimizer import paddle.v2.fluid.optimizer as optimizer
import paddle.v2.fluid.framework as framework
from paddle.v2.fluid.framework import Program
from paddle.v2.fluid.executor import Executor from paddle.v2.fluid.executor import Executor
from paddle.v2.fluid.regularizer import L2DecayRegularizer from paddle.v2.fluid.regularizer import L2DecayRegularizer
from paddle.v2.fluid.initializer import UniformInitializer from paddle.v2.fluid.initializer import UniformInitializer
@ -11,14 +10,10 @@ from paddle.v2.fluid.initializer import UniformInitializer
import numpy as np import numpy as np
BATCH_SIZE = 128 BATCH_SIZE = 128
startup_program = Program()
main_program = Program()
image = layers.data( image = layers.data(
name='x', name='x',
shape=[784], shape=[784],
data_type='float32', data_type='float32')
main_program=main_program,
startup_program=startup_program)
param_attr = { param_attr = {
'name': None, 'name': None,
@ -30,45 +25,30 @@ param_attr = {
hidden1 = layers.fc(input=image, hidden1 = layers.fc(input=image,
size=128, size=128,
act='relu', act='relu',
main_program=main_program,
startup_program=startup_program,
param_attr=param_attr) param_attr=param_attr)
hidden2 = layers.fc(input=hidden1, hidden2 = layers.fc(input=hidden1,
size=64, size=64,
act='relu', act='relu',
main_program=main_program,
startup_program=startup_program,
param_attr=param_attr) param_attr=param_attr)
predict = layers.fc(input=hidden2, predict = layers.fc(input=hidden2,
size=10, size=10,
act='softmax', act='softmax',
main_program=main_program,
startup_program=startup_program,
param_attr=param_attr) param_attr=param_attr)
label = layers.data( label = layers.data(
name='y', name='y',
shape=[1], shape=[1],
data_type='int64', data_type='int64')
main_program=main_program,
startup_program=startup_program)
cost = layers.cross_entropy( cost = layers.cross_entropy(input=predict, label=label)
input=predict, avg_cost = layers.mean(x=cost)
label=label,
main_program=main_program,
startup_program=startup_program)
avg_cost = layers.mean(
x=cost, main_program=main_program, startup_program=startup_program)
accuracy = layers.accuracy( accuracy = layers.accuracy(
input=predict, input=predict,
label=label, label=label)
main_program=main_program,
startup_program=startup_program)
optimizer = optimizer.MomentumOptimizer(learning_rate=0.001, momentum=0.9) optimizer = optimizer.MomentumOptimizer(learning_rate=0.001, momentum=0.9)
opts = optimizer.minimize(avg_cost, startup_program) opts = optimizer.minimize(avg_cost)
train_reader = paddle.batch( train_reader = paddle.batch(
paddle.reader.shuffle( paddle.reader.shuffle(
@ -78,7 +58,7 @@ train_reader = paddle.batch(
place = core.CPUPlace() place = core.CPUPlace()
exe = Executor(place) exe = Executor(place)
exe.run(startup_program) exe.run(framework.default_startup_program())
PASS_NUM = 100 PASS_NUM = 100
for pass_id in range(PASS_NUM): for pass_id in range(PASS_NUM):
@ -93,7 +73,7 @@ for pass_id in range(PASS_NUM):
tensor_y = core.LoDTensor() tensor_y = core.LoDTensor()
tensor_y.set(y_data, place) tensor_y.set(y_data, place)
outs = exe.run(main_program, outs = exe.run(framework.default_main_program(),
feed={'x': tensor_x, feed={'x': tensor_x,
'y': tensor_y}, 'y': tensor_y},
fetch_list=[avg_cost, accuracy]) fetch_list=[avg_cost, accuracy])

@ -3,8 +3,7 @@ import paddle.v2.fluid.layers as layers
import paddle.v2.fluid.nets as nets import paddle.v2.fluid.nets as nets
import paddle.v2.fluid.core as core import paddle.v2.fluid.core as core
import paddle.v2.fluid.optimizer as optimizer import paddle.v2.fluid.optimizer as optimizer
import paddle.v2.fluid.framework as framework
from paddle.v2.fluid.framework import Program, g_main_program, g_startup_program
from paddle.v2.fluid.executor import Executor from paddle.v2.fluid.executor import Executor
import numpy as np import numpy as np
@ -70,7 +69,7 @@ def main():
place = core.CPUPlace() place = core.CPUPlace()
exe = Executor(place) exe = Executor(place)
exe.run(g_startup_program) exe.run(framework.default_startup_program())
for pass_id in xrange(PASS_NUM): for pass_id in xrange(PASS_NUM):
for data in train_data(): for data in train_data():
@ -82,7 +81,7 @@ def main():
tensor_label = core.LoDTensor() tensor_label = core.LoDTensor()
tensor_label.set(label, place) tensor_label.set(label, place)
outs = exe.run(g_main_program, outs = exe.run(framework.default_main_program(),
feed={"words": tensor_words, feed={"words": tensor_words,
"label": tensor_label}, "label": tensor_label},
fetch_list=[cost, acc]) fetch_list=[cost, acc])

@ -3,8 +3,7 @@ import paddle.v2.fluid.layers as layers
import paddle.v2.fluid.nets as nets import paddle.v2.fluid.nets as nets
import paddle.v2.fluid.core as core import paddle.v2.fluid.core as core
import paddle.v2.fluid.optimizer as optimizer import paddle.v2.fluid.optimizer as optimizer
import paddle.v2.fluid.framework as framework
from paddle.v2.fluid.framework import Program, g_main_program, g_startup_program
from paddle.v2.fluid.executor import Executor from paddle.v2.fluid.executor import Executor
import numpy as np import numpy as np
@ -81,7 +80,7 @@ def main():
place = core.CPUPlace() place = core.CPUPlace()
exe = Executor(place) exe = Executor(place)
exe.run(g_startup_program) exe.run(framework.default_startup_program())
for pass_id in xrange(PASS_NUM): for pass_id in xrange(PASS_NUM):
for data in train_data(): for data in train_data():
@ -93,7 +92,7 @@ def main():
tensor_label = core.LoDTensor() tensor_label = core.LoDTensor()
tensor_label.set(label, place) tensor_label.set(label, place)
outs = exe.run(g_main_program, outs = exe.run(framework.default_main_program(),
feed={"words": tensor_words, feed={"words": tensor_words,
"label": tensor_label}, "label": tensor_label},
fetch_list=[cost, acc]) fetch_list=[cost, acc])

@ -2,8 +2,7 @@ import paddle.v2 as paddle
import paddle.v2.fluid.layers as layers import paddle.v2.fluid.layers as layers
import paddle.v2.fluid.core as core import paddle.v2.fluid.core as core
import paddle.v2.fluid.optimizer as optimizer import paddle.v2.fluid.optimizer as optimizer
import paddle.v2.fluid.framework as framework
from paddle.v2.fluid.framework import g_main_program, g_startup_program
from paddle.v2.fluid.executor import Executor from paddle.v2.fluid.executor import Executor
import numpy as np import numpy as np
@ -88,10 +87,10 @@ def main():
place = core.CPUPlace() place = core.CPUPlace()
tensor_words, tensor_label = prepare_feed_data(data, place) tensor_words, tensor_label = prepare_feed_data(data, place)
exe = Executor(place) exe = Executor(place)
exe.run(g_startup_program) exe.run(framework.default_startup_program())
while True: while True:
outs = exe.run(g_main_program, outs = exe.run(framework.default_main_program(),
feed={"words": tensor_words, feed={"words": tensor_words,
"label": tensor_label}, "label": tensor_label},
fetch_list=[cost, acc]) fetch_list=[cost, acc])

@ -2,20 +2,17 @@ import paddle.v2 as paddle
import paddle.v2.fluid.layers as layers import paddle.v2.fluid.layers as layers
import paddle.v2.fluid.core as core import paddle.v2.fluid.core as core
import paddle.v2.fluid.optimizer as optimizer import paddle.v2.fluid.optimizer as optimizer
import paddle.v2.fluid.framework as framework
from paddle.v2.fluid.framework import Program
from paddle.v2.fluid.executor import Executor from paddle.v2.fluid.executor import Executor
import numpy as np import numpy as np
startup_program = Program() PASS_NUM = 100
main_program = Program() EMBED_SIZE = 32
HIDDEN_SIZE = 256
embed_size = 32
hidden_size = 256
N = 5 N = 5
batch_size = 32 BATCH_SIZE = 32
is_sparse = True IS_SPARSE = True
word_dict = paddle.dataset.imikolov.build_dict() word_dict = paddle.dataset.imikolov.build_dict()
dict_size = len(word_dict) dict_size = len(word_dict)
@ -23,97 +20,67 @@ dict_size = len(word_dict)
first_word = layers.data( first_word = layers.data(
name='firstw', name='firstw',
shape=[1], shape=[1],
data_type='int64', data_type='int64')
main_program=main_program,
startup_program=startup_program)
second_word = layers.data( second_word = layers.data(
name='secondw', name='secondw',
shape=[1], shape=[1],
data_type='int64', data_type='int64')
main_program=main_program,
startup_program=startup_program)
third_word = layers.data( third_word = layers.data(
name='thirdw', name='thirdw',
shape=[1], shape=[1],
data_type='int64', data_type='int64')
main_program=main_program,
startup_program=startup_program)
forth_word = layers.data( forth_word = layers.data(
name='forthw', name='forthw',
shape=[1], shape=[1],
data_type='int64', data_type='int64')
main_program=main_program,
startup_program=startup_program)
next_word = layers.data( next_word = layers.data(
name='nextw', name='nextw',
shape=[1], shape=[1],
data_type='int64', data_type='int64')
main_program=main_program,
startup_program=startup_program)
embed_first = layers.embedding( embed_first = layers.embedding(
input=first_word, input=first_word,
size=[dict_size, embed_size], size=[dict_size, EMBED_SIZE],
data_type='float32', data_type='float32',
is_sparse=is_sparse, is_sparse=IS_SPARSE,
param_attr={'name': 'shared_w'}, param_attr={'name': 'shared_w'})
main_program=main_program,
startup_program=startup_program)
embed_second = layers.embedding( embed_second = layers.embedding(
input=second_word, input=second_word,
size=[dict_size, embed_size], size=[dict_size, EMBED_SIZE],
data_type='float32', data_type='float32',
is_sparse=is_sparse, is_sparse=IS_SPARSE,
param_attr={'name': 'shared_w'}, param_attr={'name': 'shared_w'})
main_program=main_program,
startup_program=startup_program)
embed_third = layers.embedding( embed_third = layers.embedding(
input=third_word, input=third_word,
size=[dict_size, embed_size], size=[dict_size, EMBED_SIZE],
data_type='float32', data_type='float32',
is_sparse=is_sparse, is_sparse=IS_SPARSE,
param_attr={'name': 'shared_w'}, param_attr={'name': 'shared_w'})
main_program=main_program,
startup_program=startup_program)
embed_forth = layers.embedding( embed_forth = layers.embedding(
input=forth_word, input=forth_word,
size=[dict_size, embed_size], size=[dict_size, EMBED_SIZE],
data_type='float32', data_type='float32',
is_sparse=is_sparse, is_sparse=IS_SPARSE,
param_attr={'name': 'shared_w'}, param_attr={'name': 'shared_w'})
main_program=main_program,
startup_program=startup_program)
concat_embed = layers.concat( concat_embed = layers.concat(
input=[embed_first, embed_second, embed_third, embed_forth], input=[embed_first, embed_second, embed_third, embed_forth],
axis=1, axis=1)
main_program=main_program,
startup_program=startup_program)
hidden1 = layers.fc(input=concat_embed, hidden1 = layers.fc(input=concat_embed,
size=hidden_size, size=HIDDEN_SIZE,
act='sigmoid', act='sigmoid')
main_program=main_program,
startup_program=startup_program)
predict_word = layers.fc(input=hidden1, predict_word = layers.fc(input=hidden1,
size=dict_size, size=dict_size,
act='softmax', act='softmax')
main_program=main_program,
startup_program=startup_program)
cost = layers.cross_entropy( cost = layers.cross_entropy(
input=predict_word, input=predict_word,
label=next_word, label=next_word)
main_program=main_program, avg_cost = layers.mean(x=cost)
startup_program=startup_program)
avg_cost = layers.mean(
x=cost, main_program=main_program, startup_program=startup_program)
sgd_optimizer = optimizer.SGDOptimizer(learning_rate=0.001) sgd_optimizer = optimizer.SGDOptimizer(learning_rate=0.001)
opts = sgd_optimizer.minimize(avg_cost, startup_program) opts = sgd_optimizer.minimize(avg_cost)
train_reader = paddle.batch( train_reader = paddle.batch(
paddle.dataset.imikolov.train(word_dict, N), batch_size) paddle.dataset.imikolov.train(word_dict, N), BATCH_SIZE)
place = core.CPUPlace() place = core.CPUPlace()
exe = Executor(place) exe = Executor(place)
@ -122,8 +89,8 @@ exe = Executor(place)
# below exit line. # below exit line.
exit(0) exit(0)
exe.run(startup_program) exe.run(framework.default_startup_program())
PASS_NUM = 100
for pass_id in range(PASS_NUM): for pass_id in range(PASS_NUM):
for data in train_reader(): for data in train_reader():
input_data = [[data_idx[idx] for data_idx in data] for idx in xrange(5)] input_data = [[data_idx[idx] for data_idx in data] for idx in xrange(5)]
@ -150,7 +117,7 @@ for pass_id in range(PASS_NUM):
next_tensor = core.LoDTensor() next_tensor = core.LoDTensor()
next_tensor.set(next_data, place) next_tensor.set(next_data, place)
outs = exe.run(main_program, outs = exe.run(framework.default_main_program(),
feed={ feed={
'firstw': first_tensor, 'firstw': first_tensor,
'secondw': second_tensor, 'secondw': second_tensor,

Loading…
Cancel
Save