tmp

8 years ago · c6937abdd1
parent 1d75674614
commit c6937abdd1
3 changed files with 7 additions and 5 deletions
--- a/paddle/fluid/operators/CMakeLists.txt
+++ b/paddle/fluid/operators/CMakeLists.txt
@ -194,8 +194,6 @@ if(WITH_DISTRIBUTE)
    set_source_files_properties(recv_op.cc PROPERTIES COMPILE_FLAGS ${DISTRIBUTE_COMPILE_FLAGS})
    op_library(listen_and_serv_op DEPS ${DISTRIBUTE_DEPS})
    set_source_files_properties(listen_and_serv_op.cc PROPERTIES COMPILE_FLAGS ${DISTRIBUTE_COMPILE_FLAGS})
-    op_library(async_listen_and_serv_op DEPS ${DISTRIBUTE_DEPS})
-    set_source_files_properties(async_listen_and_serv_op.cc PROPERTIES COMPILE_FLAGS ${DISTRIBUTE_COMPILE_FLAGS})
    op_library(send_vars_op DEPS ${DISTRIBUTE_DEPS})
    set_source_files_properties(send_vars_op.cc PROPERTIES COMPILE_FLAGS ${DISTRIBUTE_COMPILE_FLAGS})
    op_library(send_barrier_op DEPS ${DISTRIBUTE_DEPS})
--- a/paddle/fluid/operators/detail/CMakeLists.txt
+++ b/paddle/fluid/operators/detail/CMakeLists.txt
@ -1,6 +1,6 @@
 if(WITH_DISTRIBUTE)
  grpc_library(sendrecvop_grpc SRCS bytebuffer_stream.cc sendrecvop_utils.cc grpc_client.cc
-      grpc_server.cc async_grpc_server.cc variable_response.cc PROTO send_recv.proto DEPS lod_tensor selected_rows)
+      grpc_server.cc variable_response.cc PROTO send_recv.proto DEPS lod_tensor selected_rows)
  set(DISTRIBUTE_COMPILE_FLAGS "-Wno-non-virtual-dtor -Wno-error=non-virtual-dtor -Wno-error=delete-non-virtual-dtor")
  set_source_files_properties(serde_test.cc grpc_server_test.cc PROPERTIES COMPILE_FLAGS ${DISTRIBUTE_COMPILE_FLAGS})
  cc_test(serde_test SRCS serde_test.cc variable_response.cc DEPS grpc++_unsecure grpc_unsecure gpr
--- a/python/paddle/fluid/distribute_transpiler.py
+++ b/python/paddle/fluid/distribute_transpiler.py
@ -143,7 +143,8 @@ class DistributeTranspiler:
                  program=None,
                  pservers="127.0.0.1:6174",
                  trainers=1,
-                  split_method=splitter.round_robin):
+                  split_method=splitter.round_robin,
+                  sync_mode=True):
        """
            Transpile the program to distributed data-parallelism programs.
            The main_program will be transformed to use a remote parameter server
@ -191,6 +192,7 @@ class DistributeTranspiler:
        self.origin_program = program
        self.trainer_num = trainers
        self.optimize_ops = optimize_ops
+        self.sync_mode = sync_mode
        # TODO(typhoonzero): currently trainer_id is fetched from cluster system
        # like Kubernetes, we should port this to use etcd later when developing
        # fluid distributed training with fault-tolerance.
@ -473,7 +475,9 @@ class DistributeTranspiler:
                "OptimizeBlock": optimize_block,
                "endpoint": endpoint,
                "Fanin": self.trainer_num,
-                "PrefetchBlock": prefetch_block
+                "PrefetchBlock": prefetch_block,
+                "sync_mode": self.sync_mode,
+                "grad_to_id": []
            })

        pserver_program.sync_with_cpp()