|
|
@ -372,6 +372,7 @@ def load_vars(executor,
|
|
|
|
load_vars(
|
|
|
|
load_vars(
|
|
|
|
executor,
|
|
|
|
executor,
|
|
|
|
dirname=dirname,
|
|
|
|
dirname=dirname,
|
|
|
|
|
|
|
|
main_program=main_program,
|
|
|
|
vars=list(filter(predicate, main_program.list_vars())),
|
|
|
|
vars=list(filter(predicate, main_program.list_vars())),
|
|
|
|
filename=filename)
|
|
|
|
filename=filename)
|
|
|
|
else:
|
|
|
|
else:
|
|
|
@ -403,9 +404,12 @@ def load_vars(executor,
|
|
|
|
inputs={},
|
|
|
|
inputs={},
|
|
|
|
outputs={"Out": load_var_list},
|
|
|
|
outputs={"Out": load_var_list},
|
|
|
|
attrs={'file_path': os.path.join(dirname, filename)})
|
|
|
|
attrs={'file_path': os.path.join(dirname, filename)})
|
|
|
|
|
|
|
|
|
|
|
|
executor.run(load_prog)
|
|
|
|
executor.run(load_prog)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
# load slice vars on pserver, if have it.
|
|
|
|
|
|
|
|
_load_slice_up_vars(executor, dirname,
|
|
|
|
|
|
|
|
main_program._slice_vars_and_attrs)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def load_params(executor, dirname, main_program=None, filename=None):
|
|
|
|
def load_params(executor, dirname, main_program=None, filename=None):
|
|
|
|
"""
|
|
|
|
"""
|
|
|
@ -659,11 +663,19 @@ def save_inference_model(dirname,
|
|
|
|
|
|
|
|
|
|
|
|
save_persistables(executor, dirname, inference_program, params_filename)
|
|
|
|
save_persistables(executor, dirname, inference_program, params_filename)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
# if there is lookup table, the trainer 0 will notify all pserver to save.
|
|
|
|
|
|
|
|
if main_program._is_distributed and main_program._is_chief and main_program._distributed_lookup_table:
|
|
|
|
|
|
|
|
lookup_table_filename = os.path.join(dirname, "__lookup_table__")
|
|
|
|
|
|
|
|
_save_lookup_tables_by_notify(executor, lookup_table_filename,
|
|
|
|
|
|
|
|
main_program._distributed_lookup_table,
|
|
|
|
|
|
|
|
main_program._endpoints)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def load_inference_model(dirname,
|
|
|
|
def load_inference_model(dirname,
|
|
|
|
executor,
|
|
|
|
executor,
|
|
|
|
model_filename=None,
|
|
|
|
model_filename=None,
|
|
|
|
params_filename=None):
|
|
|
|
params_filename=None,
|
|
|
|
|
|
|
|
pserver_endpoints=None):
|
|
|
|
"""
|
|
|
|
"""
|
|
|
|
Load inference model from a directory
|
|
|
|
Load inference model from a directory
|
|
|
|
|
|
|
|
|
|
|
@ -679,6 +691,10 @@ def load_inference_model(dirname,
|
|
|
|
parameters were saved in a single binary
|
|
|
|
parameters were saved in a single binary
|
|
|
|
file. If parameters were saved in separate
|
|
|
|
file. If parameters were saved in separate
|
|
|
|
files, set it as 'None'.
|
|
|
|
files, set it as 'None'.
|
|
|
|
|
|
|
|
pserver_endpoints(list|None): This only need by distributed inference.
|
|
|
|
|
|
|
|
When use distributed look up table in training,
|
|
|
|
|
|
|
|
We also need it in inference.The parameter is
|
|
|
|
|
|
|
|
a list of pserver endpoints.
|
|
|
|
|
|
|
|
|
|
|
|
Returns:
|
|
|
|
Returns:
|
|
|
|
tuple: The return of this function is a tuple with three elements:
|
|
|
|
tuple: The return of this function is a tuple with three elements:
|
|
|
@ -697,12 +713,16 @@ def load_inference_model(dirname,
|
|
|
|
|
|
|
|
|
|
|
|
exe = fluid.Executor(fluid.CPUPlace())
|
|
|
|
exe = fluid.Executor(fluid.CPUPlace())
|
|
|
|
path = "./infer_model"
|
|
|
|
path = "./infer_model"
|
|
|
|
|
|
|
|
endpoints = ["127.0.0.1:2023","127.0.0.1:2024"]
|
|
|
|
[inference_program, feed_target_names, fetch_targets] =
|
|
|
|
[inference_program, feed_target_names, fetch_targets] =
|
|
|
|
fluid.io.load_inference_model(dirname=path, executor=exe)
|
|
|
|
fluid.io.load_inference_model(dirname=path, executor=exe)
|
|
|
|
results = exe.run(inference_program,
|
|
|
|
results = exe.run(inference_program,
|
|
|
|
feed={feed_target_names[0]: tensor_img},
|
|
|
|
feed={feed_target_names[0]: tensor_img},
|
|
|
|
fetch_list=fetch_targets)
|
|
|
|
fetch_list=fetch_targets)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
# if we need lookup table, we will use:
|
|
|
|
|
|
|
|
fluid.io.load_inference_model(dirname=path, executor=exe, pserver_endpoints=endpoints)
|
|
|
|
|
|
|
|
|
|
|
|
# In this exsample, the inference program was saved in the
|
|
|
|
# In this exsample, the inference program was saved in the
|
|
|
|
# "./infer_model/__model__" and parameters were saved in
|
|
|
|
# "./infer_model/__model__" and parameters were saved in
|
|
|
|
# separate files in ""./infer_model".
|
|
|
|
# separate files in ""./infer_model".
|
|
|
@ -729,6 +749,9 @@ def load_inference_model(dirname,
|
|
|
|
program = Program.parse_from_string(program_desc_str)
|
|
|
|
program = Program.parse_from_string(program_desc_str)
|
|
|
|
load_persistables(executor, dirname, program, params_filename)
|
|
|
|
load_persistables(executor, dirname, program, params_filename)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
if pserver_endpoints:
|
|
|
|
|
|
|
|
program = _endpoints_replacement(program, pserver_endpoints)
|
|
|
|
|
|
|
|
|
|
|
|
feed_target_names = program.desc.get_feed_target_names()
|
|
|
|
feed_target_names = program.desc.get_feed_target_names()
|
|
|
|
fetch_target_names = program.desc.get_fetch_target_names()
|
|
|
|
fetch_target_names = program.desc.get_fetch_target_names()
|
|
|
|
fetch_targets = [
|
|
|
|
fetch_targets = [
|
|
|
@ -738,6 +761,61 @@ def load_inference_model(dirname,
|
|
|
|
return [program, feed_target_names, fetch_targets]
|
|
|
|
return [program, feed_target_names, fetch_targets]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def _save_lookup_tables_by_notify(executor, dirname, lookup_table,
|
|
|
|
|
|
|
|
pserver_endpoints):
|
|
|
|
|
|
|
|
"""
|
|
|
|
|
|
|
|
This function will send checkpoint notify message from Trainer 0
|
|
|
|
|
|
|
|
to all the pservers.
|
|
|
|
|
|
|
|
The checkpoint notify message contains lookup table name,
|
|
|
|
|
|
|
|
the absolute path on pserver to save lookup_table.
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
Args:
|
|
|
|
|
|
|
|
executor(Executor): The executor to run for send checkpoint notify.
|
|
|
|
|
|
|
|
dirname(str): The folder where to save.
|
|
|
|
|
|
|
|
lookup_table(string): the lookup table name, when use distribute
|
|
|
|
|
|
|
|
lookup table, we can get lookup table name by DistributeTranspiler.
|
|
|
|
|
|
|
|
table_name
|
|
|
|
|
|
|
|
ps_endpoint_list(list): the parameter server ip:port list.
|
|
|
|
|
|
|
|
when use distribute lookup table, we can get ps_endpoint_list by
|
|
|
|
|
|
|
|
distribute arguments.
|
|
|
|
|
|
|
|
Return:
|
|
|
|
|
|
|
|
None
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
Examples:
|
|
|
|
|
|
|
|
.. code-block:: python
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
exe = fluid.Executor(fluid.CPUPlace())
|
|
|
|
|
|
|
|
param_path = "./my_paddle_model"
|
|
|
|
|
|
|
|
table_name = "share_w"
|
|
|
|
|
|
|
|
ps_endpoints = ["127.0.0.1:6000","127.0.0.1:6001"]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
_save_pserver_vars_by_notify(executor=exe,
|
|
|
|
|
|
|
|
dirname=param_path, lookup_table=table_name,
|
|
|
|
|
|
|
|
pserver_endpoints=ps_endpoints)
|
|
|
|
|
|
|
|
"""
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
pserver_notify_program = Program()
|
|
|
|
|
|
|
|
pserver_notify_block = pserver_notify_program.global_block()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
attrs = {}
|
|
|
|
|
|
|
|
attrs['epmap'] = pserver_endpoints
|
|
|
|
|
|
|
|
attrs['dir'] = dirname
|
|
|
|
|
|
|
|
attrs['lookup_table'] = lookup_table
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
pserver_notify_block.append_op(
|
|
|
|
|
|
|
|
type='checkpoint_notify', inputs={}, outputs={}, attrs=attrs)
|
|
|
|
|
|
|
|
executor.run(pserver_notify_program)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def _endpoints_replacement(program, endpoints):
|
|
|
|
|
|
|
|
ENDPOINT_MAP = "epmap"
|
|
|
|
|
|
|
|
for op in program.global_block().ops:
|
|
|
|
|
|
|
|
if op.has_attr(ENDPOINT_MAP):
|
|
|
|
|
|
|
|
op.set_attr(ENDPOINT_MAP, endpoints)
|
|
|
|
|
|
|
|
program._sync_with_cpp()
|
|
|
|
|
|
|
|
return program
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def get_parameter_value(para, executor):
|
|
|
|
def get_parameter_value(para, executor):
|
|
|
|
"""
|
|
|
|
"""
|
|
|
|
Get the LoDTensor value of the given parameter.
|
|
|
|
Get the LoDTensor value of the given parameter.
|
|
|
@ -799,3 +877,46 @@ def get_parameter_value_by_name(name, executor, program=None):
|
|
|
|
program = default_main_program()
|
|
|
|
program = default_main_program()
|
|
|
|
var = program.global_block().var(name)
|
|
|
|
var = program.global_block().var(name)
|
|
|
|
return get_parameter_value(var, executor)
|
|
|
|
return get_parameter_value(var, executor)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def _load_slice_up_vars(executor, dirname, slice_vars_and_attrs):
|
|
|
|
|
|
|
|
if not slice_vars_and_attrs:
|
|
|
|
|
|
|
|
return
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
load_prog = Program()
|
|
|
|
|
|
|
|
load_block = load_prog.global_block()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
for var_tuple in slice_vars_and_attrs:
|
|
|
|
|
|
|
|
orig_var = var_tuple[0]
|
|
|
|
|
|
|
|
start = var_tuple[1]
|
|
|
|
|
|
|
|
slice_var = var_tuple[2]
|
|
|
|
|
|
|
|
end = start + reduce(lambda x, y: x * y, slice_var.shape)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
clone_orig_var = load_block.create_var(
|
|
|
|
|
|
|
|
name=orig_var.name,
|
|
|
|
|
|
|
|
type=orig_var.type,
|
|
|
|
|
|
|
|
shape=orig_var.shape,
|
|
|
|
|
|
|
|
dtype=orig_var.dtype,
|
|
|
|
|
|
|
|
persistable=True)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
clone_slice_var = load_block.create_var(
|
|
|
|
|
|
|
|
name=slice_var.name,
|
|
|
|
|
|
|
|
type=slice_var.type,
|
|
|
|
|
|
|
|
shape=slice_var.shape,
|
|
|
|
|
|
|
|
dtype=slice_var.dtype,
|
|
|
|
|
|
|
|
persistable=True)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
load_block.append_op(
|
|
|
|
|
|
|
|
type='load',
|
|
|
|
|
|
|
|
inputs={},
|
|
|
|
|
|
|
|
outputs={'Out': [clone_orig_var]},
|
|
|
|
|
|
|
|
attrs={'file_path': os.path.join(dirname, clone_orig_var.name)})
|
|
|
|
|
|
|
|
load_block.append_op(
|
|
|
|
|
|
|
|
type="slice",
|
|
|
|
|
|
|
|
inputs={'Input': clone_orig_var},
|
|
|
|
|
|
|
|
outputs={'Out': clone_slice_var},
|
|
|
|
|
|
|
|
attrs={'axes': [0],
|
|
|
|
|
|
|
|
'starts': [start],
|
|
|
|
|
|
|
|
'ends': [end]})
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
executor.run(load_prog)
|
|
|
|