Merge branch 'develop' of https://github.com/PaddlePaddle/Paddle into doc_fix

wangkuiyi-patch-1
weixing02 7 years ago
commit 44c64a641d

@ -44,8 +44,8 @@ import transpiler
from param_attr import ParamAttr, WeightNormParamAttr from param_attr import ParamAttr, WeightNormParamAttr
from data_feeder import DataFeeder from data_feeder import DataFeeder
from core import LoDTensor, CPUPlace, CUDAPlace, CUDAPinnedPlace from core import LoDTensor, CPUPlace, CUDAPlace, CUDAPinnedPlace
from transpiler import DistributeTranspiler, SimpleDistributeTranspiler, \ from transpiler import DistributeTranspiler, InferenceTranspiler, \
InferenceTranspiler, memory_optimize, release_memory memory_optimize, release_memory
from concurrency import (Go, make_channel, channel_send, channel_recv, from concurrency import (Go, make_channel, channel_send, channel_recv,
channel_close, Select) channel_close, Select)
from lod_tensor import create_lod_tensor, create_random_int_lodtensor from lod_tensor import create_lod_tensor, create_random_int_lodtensor

@ -12,40 +12,16 @@
# See the License for the specific language governing permissions and # See the License for the specific language governing permissions and
# limitations under the License. # limitations under the License.
import unittest
import paddle.fluid as fluid import paddle.fluid as fluid
import paddle.fluid.core as core
import paddle.fluid.layers as layers
from paddle.fluid.transpiler.distribute_transpiler import delete_ops from paddle.fluid.transpiler.distribute_transpiler import delete_ops
import numpy
from transpiler_test import TranspilerTest
class TestDistTranspiler(unittest.TestCase): class TestDistTranspiler(TranspilerTest):
def setUp(self): def setUp(self):
self.trainer_id = 0
self.trainers = 2
self.pservers = 2
self.pserver_eps = "127.0.0.1:6174,127.0.0.1:6175"
self.current_pserver_ep = "127.0.0.1:6174" self.current_pserver_ep = "127.0.0.1:6174"
def net_conf(self):
x = fluid.layers.data(name='x', shape=[1000], dtype='float32')
y_predict = fluid.layers.fc(input=x,
size=1000,
act=None,
param_attr=fluid.ParamAttr(name='fc_w'))
y = fluid.layers.data(name='y', shape=[1], dtype='float32')
cost = fluid.layers.square_error_cost(input=y_predict, label=y)
avg_cost = fluid.layers.mean(cost)
sgd_optimizer = fluid.optimizer.SGD(learning_rate=0.1)
optimize_ops, params_grads = sgd_optimizer.minimize(avg_cost)
return optimize_ops, params_grads
def test_transpiler(self): def test_transpiler(self):
trainer = self.get_trainer() trainer = self.get_trainer()
pserver, startup = self.get_pserver(self.current_pserver_ep) pserver, startup = self.get_pserver(self.current_pserver_ep)
@ -70,14 +46,6 @@ class TestDistTranspiler(unittest.TestCase):
fc_w_var = startup.global_block().var("fc_w.block1") fc_w_var = startup.global_block().var("fc_w.block1")
self.assertEqual(fc_w_var.shape, (500, 1000)) self.assertEqual(fc_w_var.shape, (500, 1000))
def get_main_program(self):
main = fluid.Program()
with fluid.program_guard(main):
self.net_conf()
return main
def get_expect_trainer_ops(self): def get_expect_trainer_ops(self):
trainer = fluid.Program() trainer = fluid.Program()
@ -92,25 +60,6 @@ class TestDistTranspiler(unittest.TestCase):
ops.insert(ops.index("elementwise_add_grad") + 1, "send_vars") ops.insert(ops.index("elementwise_add_grad") + 1, "send_vars")
return ops return ops
def get_trainer(self):
return self._transpiler_instance().get_trainer_program()
def get_pserver(self, ep):
t = self._transpiler_instance()
pserver = t.get_pserver_program(ep)
startup = t.get_startup_program(ep, pserver)
return pserver, startup
def _transpiler_instance(self):
main = self.get_main_program()
t = fluid.DistributeTranspiler()
t.transpile(
self.trainer_id,
program=main,
pservers=self.pserver_eps,
trainers=self.trainers)
return t
if __name__ == "__main__": if __name__ == "__main__":
unittest.main() unittest.main()

@ -0,0 +1,80 @@
# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import numpy as np
import paddle.fluid as fluid
from paddle.fluid.transpiler.distribute_transpiler import delete_ops
from transpiler_test import TranspilerTest
class TestSimpleDistTranspiler(TranspilerTest):
def setUp(self):
self.current_pserver_ep = "127.0.0.1:6175"
def test_simple_transpiler(self):
np.random.seed(1)
trainer = self.get_trainer()
pserver, startup = self.get_pserver(self.current_pserver_ep)
self.assertEqual([op.type for op in trainer.global_block().ops],
self.get_expect_trainer_ops())
self.assertEqual(len(pserver.blocks), 2)
# block0: listen_and_serv
self.assertEqual([op.type for op in pserver.blocks[0].ops],
["listen_and_serv"])
# block1: optimize pass
self.assertEqual([op.type for op in pserver.blocks[1].ops],
["sum", "scale", "sgd"])
# confirm startup program
self.assertEqual([op.type for op in startup.global_block().ops],
["fill_constant", "uniform_random", "uniform_random"])
# the variable #fc_w will NOT be splited
fc_w_var = startup.global_block().var("fc_w@GRAD")
self.assertEqual(fc_w_var.shape, (1000, 1000))
fc_w_var = startup.global_block().var("fc_w@GRAD.trainer_0")
self.assertEqual(fc_w_var.shape, (1000, 1000))
def get_expect_trainer_ops(self):
trainer = fluid.Program()
with fluid.program_guard(trainer):
optimize_ops, params_grads = self.net_conf()
delete_ops(trainer.global_block(), optimize_ops)
ops = [op.type for op in trainer.global_block().ops] + [
"send_vars", "send_barrier", "recv", "recv", "fetch_barrier"
]
ops.insert(ops.index("elementwise_add_grad") + 1, "send_vars")
return ops
def _transpiler_instance(self):
main = self.get_main_program()
t = fluid.DistributeTranspiler()
t.transpile(
self.trainer_id,
program=main,
pservers=self.pserver_eps,
trainers=self.trainers,
slice_var_up=False)
return t
if __name__ == "__main__":
unittest.main()

@ -14,14 +14,14 @@
import math import math
import unittest import unittest
from paddle.fluid.transpiler.distribute_transpiler import split_variable from paddle.fluid.transpiler.distribute_transpiler import slice_variable
import paddle.fluid as fluid import paddle.fluid as fluid
import paddle.fluid.core as core import paddle.fluid.core as core
import random import random
class TestSplitVar(unittest.TestCase): class TestSliceVar(unittest.TestCase):
def check_split_output(self, shapes, expected_sizes, min_size): def check_slice_output(self, shapes, expected_sizes, min_size):
var_list = [] var_list = []
program = fluid.Program() program = fluid.Program()
for shape in shapes: for shape in shapes:
@ -31,7 +31,7 @@ class TestSplitVar(unittest.TestCase):
# dtype=core.VarDesc.VarType.LOD_TENSOR, # dtype=core.VarDesc.VarType.LOD_TENSOR,
shape=shape) shape=shape)
var_list.append(var) var_list.append(var)
blocks = split_variable(var_list, 10, min_size) blocks = slice_variable(var_list, 10, min_size)
all_sizes = [] all_sizes = []
for s in expected_sizes: for s in expected_sizes:
for s2 in s: for s2 in s:
@ -49,7 +49,7 @@ class TestSplitVar(unittest.TestCase):
[1150, 1150, 1150, 1150, 1150, 1150, 1100] [1150, 1150, 1150, 1150, 1150, 1150, 1100]
] ]
self.check_split_output(shapes, expected_sizes, 1024) self.check_slice_output(shapes, expected_sizes, 1024)
def test_check_output_8k(self): def test_check_output_8k(self):
shapes = [[3, 5], [1024], [28, 784], [8, 1020], [800, 10], shapes = [[3, 5], [1024], [28, 784], [8, 1020], [800, 10],
@ -57,7 +57,7 @@ class TestSplitVar(unittest.TestCase):
expected_sizes = [[15], [1024], [10976, 10976], [8160], [8000], expected_sizes = [[15], [1024], [10976, 10976], [8160], [8000],
[35937, 35937, 35937, 35937, 35937, 35937]] [35937, 35937, 35937, 35937, 35937, 35937]]
self.check_split_output(shapes, expected_sizes, 8192) self.check_slice_output(shapes, expected_sizes, 8192)
if __name__ == '__main__': if __name__ == '__main__':

@ -0,0 +1,73 @@
# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import unittest
import numpy as np
import paddle.fluid as fluid
import paddle.fluid.core as core
import paddle.fluid.layers as layers
class TranspilerTest(unittest.TestCase):
@classmethod
def setUpClass(self):
self.trainer_id = 0
self.trainers = 2
self.pservers = 2
self.pserver_eps = "127.0.0.1:6174,127.0.0.1:6175"
def net_conf(self):
x = fluid.layers.data(name='x', shape=[1000], dtype='float32')
y_predict = fluid.layers.fc(input=x,
size=1000,
act=None,
param_attr=fluid.ParamAttr(name='fc_w'))
y = fluid.layers.data(name='y', shape=[1], dtype='float32')
cost = fluid.layers.square_error_cost(input=y_predict, label=y)
avg_cost = fluid.layers.mean(cost)
sgd_optimizer = fluid.optimizer.SGD(learning_rate=0.1)
optimize_ops, params_grads = sgd_optimizer.minimize(avg_cost)
return optimize_ops, params_grads
def get_main_program(self):
main = fluid.Program()
with fluid.program_guard(main):
self.net_conf()
return main
def get_trainer(self):
return self._transpiler_instance().get_trainer_program()
def get_pserver(self, ep):
t = self._transpiler_instance()
pserver = t.get_pserver_program(ep)
startup = t.get_startup_program(ep, pserver)
return pserver, startup
def _transpiler_instance(self):
main = self.get_main_program()
t = fluid.DistributeTranspiler()
t.transpile(
self.trainer_id,
program=main,
pservers=self.pserver_eps,
trainers=self.trainers)
return t

@ -15,10 +15,9 @@
from distribute_transpiler import DistributeTranspiler from distribute_transpiler import DistributeTranspiler
from inference_transpiler import InferenceTranspiler from inference_transpiler import InferenceTranspiler
from memory_optimization_transpiler import memory_optimize, release_memory from memory_optimization_transpiler import memory_optimize, release_memory
from distribute_transpiler_simple import SimpleDistributeTranspiler
from ps_dispatcher import HashName, RoundRobin from ps_dispatcher import HashName, RoundRobin
__all__ = [ __all__ = [
"DistributeTranspiler", "InferenceTranspiler", "SimpleDistributeTranspiler", "DistributeTranspiler", "InferenceTranspiler", "memory_optimize",
"memory_optimize", "release_memory", "HashName", "RoundRobin" "release_memory", "HashName", "RoundRobin"
] ]

@ -39,6 +39,7 @@ Steps to transpile pserver:
from __future__ import print_function from __future__ import print_function
import math import math
import numpy as np
from ps_dispatcher import RoundRobin, HashName, PSDispatcher from ps_dispatcher import RoundRobin, HashName, PSDispatcher
from .. import core, framework from .. import core, framework
@ -70,7 +71,7 @@ def same_or_split_var(p_name, var_name):
return p_name == var_name or p_name.startswith(var_name + ".block") return p_name == var_name or p_name.startswith(var_name + ".block")
def split_variable(var_list, service_count, min_block_size=8192): def slice_variable(var_list, slice_count, min_block_size=8192):
""" """
We may need to split dense tensor to one or more blocks and put We may need to split dense tensor to one or more blocks and put
them equally onto parameter server. One block is a sub-tensor them equally onto parameter server. One block is a sub-tensor
@ -82,8 +83,8 @@ def split_variable(var_list, service_count, min_block_size=8192):
Args: Args:
var_list (list): List of variables. var_list (list): List of variables.
service_count (int): Numel of pserver services. A pserver may have two slice_count (int): Numel of count that variables will be sliced, which
or more listening ports. could be the pserver services' count.
min_block_size (int): Minimum splitted block size. min_block_size (int): Minimum splitted block size.
Returns: Returns:
blocks (list[(varname, block_id, current_block_size)]): A list blocks (list[(varname, block_id, current_block_size)]): A list
@ -91,12 +92,12 @@ def split_variable(var_list, service_count, min_block_size=8192):
""" """
blocks = [] blocks = []
for var in var_list: for var in var_list:
split_count = service_count split_count = slice_count
var_numel = reduce(lambda x, y: x * y, var.shape) var_numel = reduce(lambda x, y: x * y, var.shape)
max_pserver_count = int(math.floor(var_numel / float(min_block_size))) max_pserver_count = int(math.floor(var_numel / float(min_block_size)))
if max_pserver_count == 0: if max_pserver_count == 0:
max_pserver_count = 1 max_pserver_count = 1
if max_pserver_count < service_count: if max_pserver_count < slice_count:
split_count = max_pserver_count split_count = max_pserver_count
block_size = int(math.ceil(var_numel / float(split_count))) block_size = int(math.ceil(var_numel / float(split_count)))
@ -177,7 +178,7 @@ class DistributeTranspiler:
for index in range(len(self.pserver_endpoints)) for index in range(len(self.pserver_endpoints))
] ]
def _init_splited_vars(self, split_method): def _init_splited_vars(self, slice_var_up):
# update these mappings for further transpile: # update these mappings for further transpile:
# 1. param_var_mapping: param var name -> [splited params vars] # 1. param_var_mapping: param var name -> [splited params vars]
# 2. grad_var_mapping: grad var name -> [splited grads vars] # 2. grad_var_mapping: grad var name -> [splited grads vars]
@ -196,9 +197,19 @@ class DistributeTranspiler:
self._update_dist_lookup_table_vars(param_list, grad_list, self._update_dist_lookup_table_vars(param_list, grad_list,
self.params_grads) self.params_grads)
grad_blocks = split_variable(grad_list, len(self.pserver_endpoints)) if slice_var_up:
param_blocks = split_variable(param_list, len(self.pserver_endpoints)) # when we slice var up into blocks, we will slice the var according to
# pserver services' count. A pserver may have two or more listening ports.
grad_blocks = slice_variable(grad_list, len(self.pserver_endpoints))
param_blocks = slice_variable(param_list,
len(self.pserver_endpoints))
else:
# when we do NOT slice var up into blocks, we will always slice params
# grads into one block.
grad_blocks = slice_variable(grad_list, 1)
param_blocks = slice_variable(param_list, 1)
assert (len(grad_blocks) == len(param_blocks)) assert (len(grad_blocks) == len(param_blocks))
# origin_varname -> [splited_var] # origin_varname -> [splited_var]
self.param_var_mapping = self._create_vars_from_blocklist( self.param_var_mapping = self._create_vars_from_blocklist(
self.origin_program, param_blocks) self.origin_program, param_blocks)
@ -229,6 +240,7 @@ class DistributeTranspiler:
program=None, program=None,
pservers="127.0.0.1:6174", pservers="127.0.0.1:6174",
trainers=1, trainers=1,
slice_var_up=True,
split_method=RoundRobin, split_method=RoundRobin,
sync_mode=True): sync_mode=True):
""" """
@ -262,13 +274,27 @@ class DistributeTranspiler:
self.has_distributed_lookup_table = self._has_distributed_lookup_table() self.has_distributed_lookup_table = self._has_distributed_lookup_table()
# split and create vars, then put splited vars in dicts for later use. # split and create vars, then put splited vars in dicts for later use.
self._init_splited_vars(split_method) self._init_splited_vars(slice_var_up)
# step 3.1: insert send op to send gradient vars to parameter servers # step 3.1: insert send op to send gradient vars to parameter servers
ps_dispatcher.reset() ps_dispatcher.reset()
send_vars = [] send_vars = []
for orig_varname, splited_vars in self.grad_var_mapping.items():
# in general cases, the number of pservers is times of 2, and this
# will lead to uneven distribution among weights and bias:
# fc_w@GRAD_trainer_0, fc_w@GRAD_trainer_1 --> pserver1
# fc_b@GRAD_trainer_0, fc_b@GRAD_trainer_1 --> pserver2
# shuffle the map will avoid the uneven distribution above
grad_var_mapping_items = self.grad_var_mapping.items()
if not slice_var_up:
np.random.shuffle(grad_var_mapping_items)
for orig_varname, splited_vars in grad_var_mapping_items:
eplist = ps_dispatcher.dispatch(splited_vars) eplist = ps_dispatcher.dispatch(splited_vars)
if not slice_var_up:
assert (len(splited_vars) == 1)
if len(splited_vars) == 1: if len(splited_vars) == 1:
orig_varname = splited_vars[0].name orig_varname = splited_vars[0].name
index = find_op_by_output_arg(program.global_block(), index = find_op_by_output_arg(program.global_block(),
@ -316,6 +342,7 @@ class DistributeTranspiler:
for i, ep in enumerate(eplist): for i, ep in enumerate(eplist):
self.param_grad_ep_mapping[ep]["params"].append(recv_vars[i]) self.param_grad_ep_mapping[ep]["params"].append(recv_vars[i])
self.param_grad_ep_mapping[ep]["grads"].append(send_vars[i]) self.param_grad_ep_mapping[ep]["grads"].append(send_vars[i])
# step4: Concat the parameters splits together after recv. # step4: Concat the parameters splits together after recv.
for varname, splited_var in self.param_var_mapping.iteritems(): for varname, splited_var in self.param_var_mapping.iteritems():
eps = [] eps = []

Loading…
Cancel
Save