|
|
|
@ -559,7 +559,8 @@ class FleetUtil(object):
|
|
|
|
|
hadoop_fs_name,
|
|
|
|
|
hadoop_fs_ugi,
|
|
|
|
|
hadoop_home="$HADOOP_HOME",
|
|
|
|
|
donefile_name="sparse_cache.meta"):
|
|
|
|
|
donefile_name="sparse_cache.meta",
|
|
|
|
|
**kwargs):
|
|
|
|
|
"""
|
|
|
|
|
write cache donefile
|
|
|
|
|
|
|
|
|
@ -572,6 +573,9 @@ class FleetUtil(object):
|
|
|
|
|
hadoop_fs_ugi(str): hdfs/afs fs ugi
|
|
|
|
|
hadoop_home(str): hadoop home, default is "$HADOOP_HOME"
|
|
|
|
|
donefile_name(str): donefile name, default is "sparse_cache.meta"
|
|
|
|
|
kwargs(dict): user defined properties
|
|
|
|
|
file_num(int): cache file num
|
|
|
|
|
table_id(int): cache table id
|
|
|
|
|
|
|
|
|
|
Examples:
|
|
|
|
|
.. code-block:: python
|
|
|
|
@ -591,12 +595,14 @@ class FleetUtil(object):
|
|
|
|
|
day = str(day)
|
|
|
|
|
pass_id = str(pass_id)
|
|
|
|
|
key_num = int(key_num)
|
|
|
|
|
file_num = kwargs.get("file_num", 16)
|
|
|
|
|
table_id = kwargs.get("table_id", 0)
|
|
|
|
|
|
|
|
|
|
if pass_id != "-1":
|
|
|
|
|
suffix_name = "/%s/delta-%s/000_cache" % (day, pass_id)
|
|
|
|
|
suffix_name = "/%s/delta-%s/%03d_cache" % (day, pass_id, table_id)
|
|
|
|
|
model_path = output_path.rstrip("/") + suffix_name
|
|
|
|
|
else:
|
|
|
|
|
suffix_name = "/%s/base/000_cache" % day
|
|
|
|
|
suffix_name = "/%s/base/%03d_cache" % (day, table_id)
|
|
|
|
|
model_path = output_path.rstrip("/") + suffix_name
|
|
|
|
|
|
|
|
|
|
if fleet.worker_index() == 0:
|
|
|
|
@ -610,8 +616,8 @@ class FleetUtil(object):
|
|
|
|
|
self.rank0_error( \
|
|
|
|
|
"not write because %s already exists" % donefile_path)
|
|
|
|
|
else:
|
|
|
|
|
meta_str = \
|
|
|
|
|
"file_prefix:part\npart_num:16\nkey_num:%d\n" % key_num
|
|
|
|
|
meta_str = "file_prefix:part\npart_num:%s\nkey_num:%d\n" \
|
|
|
|
|
% (file_num, key_num)
|
|
|
|
|
with open(donefile_name, "w") as f:
|
|
|
|
|
f.write(meta_str)
|
|
|
|
|
client.upload(
|
|
|
|
@ -743,7 +749,7 @@ class FleetUtil(object):
|
|
|
|
|
fleet.save_persistables(None, model_path, mode=2)
|
|
|
|
|
self.rank0_print("save_xbox_base_model done")
|
|
|
|
|
|
|
|
|
|
def save_cache_model(self, output_path, day, pass_id, mode=1):
|
|
|
|
|
def save_cache_model(self, output_path, day, pass_id, mode=1, **kwargs):
|
|
|
|
|
"""
|
|
|
|
|
save cache model
|
|
|
|
|
|
|
|
|
@ -752,6 +758,8 @@ class FleetUtil(object):
|
|
|
|
|
day(str|int): training day
|
|
|
|
|
pass_id(str|int): training pass id
|
|
|
|
|
mode(str|int): save mode
|
|
|
|
|
kwargs(dict): user defined properties
|
|
|
|
|
table_id(int): table id to save cache
|
|
|
|
|
|
|
|
|
|
Returns:
|
|
|
|
|
key_num(int): cache key num
|
|
|
|
@ -767,14 +775,16 @@ class FleetUtil(object):
|
|
|
|
|
day = str(day)
|
|
|
|
|
pass_id = str(pass_id)
|
|
|
|
|
mode = int(mode)
|
|
|
|
|
table_id = kwargs.get("table_id", 0)
|
|
|
|
|
suffix_name = "/%s/delta-%s" % (day, pass_id)
|
|
|
|
|
model_path = output_path.rstrip("/") + suffix_name
|
|
|
|
|
self.rank0_print("going to save_cache_model %s" % model_path)
|
|
|
|
|
key_num = fleet.save_cache_model(None, model_path, mode=mode)
|
|
|
|
|
key_num = fleet.save_cache_model(
|
|
|
|
|
None, model_path, mode=mode, table_id=table_id)
|
|
|
|
|
self.rank0_print("save_cache_model done")
|
|
|
|
|
return key_num
|
|
|
|
|
|
|
|
|
|
def save_cache_base_model(self, output_path, day):
|
|
|
|
|
def save_cache_base_model(self, output_path, day, **kwargs):
|
|
|
|
|
"""
|
|
|
|
|
save cache model
|
|
|
|
|
|
|
|
|
@ -782,6 +792,8 @@ class FleetUtil(object):
|
|
|
|
|
output_path(str): output path
|
|
|
|
|
day(str|int): training day
|
|
|
|
|
pass_id(str|int): training pass id
|
|
|
|
|
kwargs(dict): user defined properties
|
|
|
|
|
table_id(int): table id to save cache
|
|
|
|
|
|
|
|
|
|
Returns:
|
|
|
|
|
key_num(int): cache key num
|
|
|
|
@ -795,10 +807,12 @@ class FleetUtil(object):
|
|
|
|
|
|
|
|
|
|
"""
|
|
|
|
|
day = str(day)
|
|
|
|
|
table_id = kwargs.get("table_id", 0)
|
|
|
|
|
suffix_name = "/%s/base" % day
|
|
|
|
|
model_path = output_path.rstrip("/") + suffix_name
|
|
|
|
|
self.rank0_print("going to save_cache_base_model %s" % model_path)
|
|
|
|
|
key_num = fleet.save_cache_model(None, model_path, mode=2)
|
|
|
|
|
key_num = fleet.save_cache_model(
|
|
|
|
|
None, model_path, mode=2, table_id=table_id)
|
|
|
|
|
self.rank0_print("save_cache_base_model done")
|
|
|
|
|
return key_num
|
|
|
|
|
|
|
|
|
@ -845,6 +859,95 @@ class FleetUtil(object):
|
|
|
|
|
int(table.table_id), var_name_list)
|
|
|
|
|
fleet._role_maker._barrier_worker()
|
|
|
|
|
|
|
|
|
|
def save_paddle_inference_model(self,
|
|
|
|
|
executor,
|
|
|
|
|
scope,
|
|
|
|
|
program,
|
|
|
|
|
feeded_vars,
|
|
|
|
|
target_vars,
|
|
|
|
|
output_path,
|
|
|
|
|
day,
|
|
|
|
|
pass_id,
|
|
|
|
|
hadoop_fs_name,
|
|
|
|
|
hadoop_fs_ugi,
|
|
|
|
|
hadoop_home="$HADOOP_HOME",
|
|
|
|
|
save_combine=True):
|
|
|
|
|
"""
|
|
|
|
|
save paddle inference model, and upload to hdfs dnn_plugin path
|
|
|
|
|
|
|
|
|
|
Args:
|
|
|
|
|
executor(Executor): fluid Executor
|
|
|
|
|
scope(Scope): fluid Scope
|
|
|
|
|
program(Program): fluid Program
|
|
|
|
|
feeded_vars(list[Variable]): feed vars
|
|
|
|
|
target_vars(list[variable]): fetch vars
|
|
|
|
|
output_path(str): hdfs/afs output path
|
|
|
|
|
day(str|int): training day
|
|
|
|
|
pass_id(str|int): training pass
|
|
|
|
|
hadoop_fs_name(str): hadoop fs name
|
|
|
|
|
hadoop_fs_ugi(str): hadoop fs ugi
|
|
|
|
|
hadoop_home(str): hadoop home, default is "$HADOOP_HOME"
|
|
|
|
|
save_combine(bool): whether to save in a file or seperate files,
|
|
|
|
|
default is True
|
|
|
|
|
|
|
|
|
|
Examples:
|
|
|
|
|
.. code-block:: python
|
|
|
|
|
|
|
|
|
|
from paddle.fluid.incubate.fleet.utils.fleet_util import FleetUtil
|
|
|
|
|
fleet_util = FleetUtil()
|
|
|
|
|
fleet_util.save_paddle_inference_model(exe,
|
|
|
|
|
join_scope,
|
|
|
|
|
join_program,
|
|
|
|
|
feeded_vars,
|
|
|
|
|
target_vars,
|
|
|
|
|
"hdfs:/my/output/path/",
|
|
|
|
|
day=20190727,
|
|
|
|
|
pass_id=6,
|
|
|
|
|
hadoop_fs_name="xxx",
|
|
|
|
|
hadoop_fs_ugi="xxx,xxx")
|
|
|
|
|
"""
|
|
|
|
|
day = str(day)
|
|
|
|
|
pass_id = str(pass_id)
|
|
|
|
|
feeded_var_names = [i.name for i in feeded_vars]
|
|
|
|
|
model_name = "inference_model"
|
|
|
|
|
# pull dense before save
|
|
|
|
|
self.pull_all_dense_params(scope, program)
|
|
|
|
|
if fleet.worker_index() == 0:
|
|
|
|
|
with fluid.scope_guard(scope):
|
|
|
|
|
if save_combine:
|
|
|
|
|
fluid.io.save_inference_model(
|
|
|
|
|
dirname=model_name,
|
|
|
|
|
feeded_var_names=feeded_var_names,
|
|
|
|
|
target_vars=target_vars,
|
|
|
|
|
executor=exe,
|
|
|
|
|
main_program=program,
|
|
|
|
|
params_filename="params")
|
|
|
|
|
else:
|
|
|
|
|
fluid.io.save_inference_model(
|
|
|
|
|
dirname=model_name,
|
|
|
|
|
feeded_var_names=feeded_var_names,
|
|
|
|
|
target_vars=target_vars,
|
|
|
|
|
executor=exe,
|
|
|
|
|
main_program=program)
|
|
|
|
|
|
|
|
|
|
configs = {
|
|
|
|
|
"fs.default.name": hadoop_fs_name,
|
|
|
|
|
"hadoop.job.ugi": hadoop_fs_ugi
|
|
|
|
|
}
|
|
|
|
|
client = HDFSClient(hadoop_home, configs)
|
|
|
|
|
|
|
|
|
|
if pass_id == "-1":
|
|
|
|
|
dest = "%s/%s/base/dnn_plugin/" % (output_path, day)
|
|
|
|
|
else:
|
|
|
|
|
dest = "%s/%s/delta-%s/dnn_plugin/" % (output_path, day,
|
|
|
|
|
pass_id)
|
|
|
|
|
if not client.is_exist(dest):
|
|
|
|
|
client.makedirs(dest)
|
|
|
|
|
|
|
|
|
|
client.upload(dest, model_name)
|
|
|
|
|
|
|
|
|
|
fleet._role_maker._barrier_worker()
|
|
|
|
|
|
|
|
|
|
def save_paddle_params(self,
|
|
|
|
|
executor,
|
|
|
|
|
scope,
|
|
|
|
|