Reverse iterator op's input

test=develop
6 years ago · cce766d710
parent 1a55f7d38c
commit cce766d710
3 changed files with 209 additions and 234 deletions
--- a/paddle/fluid/imperative/layer.cc
+++ b/paddle/fluid/imperative/layer.cc
@ -81,10 +81,6 @@ class TensorAddToFunctor : public boost::static_visitor<> {

 }  // namespace detail

-template <int MajorType = Eigen::RowMajor,
-          typename IndexType = Eigen::DenseIndex>
-using EigenVector = framework::EigenVector<float, MajorType, IndexType>;
-
 void AddTo(Variable* src, Variable* dst, platform::Place place) {
  framework::Tensor* dst_tensor = dst->GetMutable<framework::LoDTensor>();
  framework::Tensor* src_tensor = src->GetMutable<framework::LoDTensor>();
@ -99,18 +95,10 @@ void AddTo(Variable* src, Variable* dst, platform::Place place) {
                 "dst_numel %lld vs. src_numel %lld", dst_tensor->numel(),
                 src_tensor->numel());

-  auto result = EigenVector<>::Flatten(*dst_tensor);
-  auto in_0_e = EigenVector<>::Flatten(*dst_tensor);
-  auto in_1_e = EigenVector<>::Flatten(*src_tensor);
-  platform::DeviceContext* dev_ctx =
-      platform::DeviceContextPool::Instance().Get(place);
-  platform::CPUDeviceContext* x =
-      reinterpret_cast<platform::CPUDeviceContext*>(dev_ctx);
-  result.device(*x->eigen_device()) = in_0_e + in_1_e;
-  // detail::TensorAddToFunctor<float> func(
-  // src_tensor->numel(), src_tensor->data<float>(),
-  // dst_tensor->mutable_data<float>(place));
-  // boost::apply_visitor(func, place);
+  detail::TensorAddToFunctor<float> func(
+      src_tensor->numel(), src_tensor->data<float>(),
+      dst_tensor->mutable_data<float>(place));
+  boost::apply_visitor(func, place);
 }

 class Autograd {
@ -134,7 +122,7 @@ class Autograd {
      std::map<std::string, std::vector<VarBase*>> input_grads =
          ready_op->ApplyGrad();

-      for (auto it : input_grads) {
+      for (auto it = input_grads.rbegin(); it != input_grads.rend(); ++it) {
        const std::vector<VarBase*>& ingrads = it.second;
        for (int64_t i = ingrads.size() - 1; i >= 0; --i) {
          if (!ingrads[i]) continue;
--- a/python/paddle/fluid/framework.py
+++ b/python/paddle/fluid/framework.py
@ -104,14 +104,14 @@ def cuda_places(device_ids=None):
    :code:`FLAGS_selected_gpus=0,1,2`, the returned list would
    be [fluid.CUDAPlace(0), fluid.CUDAPlace(1), fluid.CUDAPlace(2)].
    If :code:`FLAGS_selected_gpus` is not set, all visible
-    gpu places would be returned.
+    gpu places would be returned.  

    If :code:`device_ids` is not None, it should be the device
-    ids of gpus. For example, if :code:`device_ids=[0,1,2]`,
-    the returned list would be
+    ids of gpus. For example, if :code:`device_ids=[0,1,2]`, 
+    the returned list would be 
    [fluid.CUDAPlace(0), fluid.CUDAPlace(1), fluid.CUDAPlace(2)].
-
-    Args:
+    
+    Args: 
        device_ids (None|list(int)|tuple(int)): gpu device id list.

    Returns:
@ -133,11 +133,11 @@ def cuda_places(device_ids=None):
 def cpu_places(device_count=None):
    '''
    Create a list of :code:`fluid.CPUPlace` objects.
-
+    
    If :code:`device_count` is None, the device count would
-    be determined by environment variable :code:`CPU_NUM`.
+    be determined by environment variable :code:`CPU_NUM`. 
    If :code:`CPU_NUM` is not set, the device count would
-    be determined by :code:`multiprocessing.cpu_count()`.
+    be determined by :code:`multiprocessing.cpu_count()`. 

    Args:
        device_count (None|int): device number.
@ -155,9 +155,9 @@ def cuda_pinned_places(device_count=None):
    Create a list of :code:`fluid.CUDAPinnedPlace` objects.

    If :code:`device_count` is None, the device count would
-    be determined by environment variable :code:`CPU_NUM`.
+    be determined by environment variable :code:`CPU_NUM`. 
    If :code:`CPU_NUM` is not set, the device count would
-    be determined by :code:`multiprocessing.cpu_count()`.
+    be determined by :code:`multiprocessing.cpu_count()`. 

    Args:
        device_count (None|int): device number.
@ -2716,6 +2716,11 @@ class Program(object):
        # whether the program is optimized by memory_optimize_transpiler
        self.__is_mem_optimized = False

+        # if this program has been optimized by distributed optimizer
+        # fleet_opt will be given a value
+        self._fleet_opt = None
+        self._program_config = None
+
    @property
    def _is_mem_optimized(self):
        # if the program is optimized, operator input/outputs
--- a/python/paddle/fluid/tests/unittests/test_imperative_basic.py
+++ b/python/paddle/fluid/tests/unittests/test_imperative_basic.py