Reverse iterator op's input

test=develop
devel
minqiyang 6 years ago
parent 1a55f7d38c
commit cce766d710

@ -81,10 +81,6 @@ class TensorAddToFunctor : public boost::static_visitor<> {
} // namespace detail
template <int MajorType = Eigen::RowMajor,
typename IndexType = Eigen::DenseIndex>
using EigenVector = framework::EigenVector<float, MajorType, IndexType>;
void AddTo(Variable* src, Variable* dst, platform::Place place) {
framework::Tensor* dst_tensor = dst->GetMutable<framework::LoDTensor>();
framework::Tensor* src_tensor = src->GetMutable<framework::LoDTensor>();
@ -99,18 +95,10 @@ void AddTo(Variable* src, Variable* dst, platform::Place place) {
"dst_numel %lld vs. src_numel %lld", dst_tensor->numel(),
src_tensor->numel());
auto result = EigenVector<>::Flatten(*dst_tensor);
auto in_0_e = EigenVector<>::Flatten(*dst_tensor);
auto in_1_e = EigenVector<>::Flatten(*src_tensor);
platform::DeviceContext* dev_ctx =
platform::DeviceContextPool::Instance().Get(place);
platform::CPUDeviceContext* x =
reinterpret_cast<platform::CPUDeviceContext*>(dev_ctx);
result.device(*x->eigen_device()) = in_0_e + in_1_e;
// detail::TensorAddToFunctor<float> func(
// src_tensor->numel(), src_tensor->data<float>(),
// dst_tensor->mutable_data<float>(place));
// boost::apply_visitor(func, place);
detail::TensorAddToFunctor<float> func(
src_tensor->numel(), src_tensor->data<float>(),
dst_tensor->mutable_data<float>(place));
boost::apply_visitor(func, place);
}
class Autograd {
@ -134,7 +122,7 @@ class Autograd {
std::map<std::string, std::vector<VarBase*>> input_grads =
ready_op->ApplyGrad();
for (auto it : input_grads) {
for (auto it = input_grads.rbegin(); it != input_grads.rend(); ++it) {
const std::vector<VarBase*>& ingrads = it.second;
for (int64_t i = ingrads.size() - 1; i >= 0; --i) {
if (!ingrads[i]) continue;

@ -104,14 +104,14 @@ def cuda_places(device_ids=None):
:code:`FLAGS_selected_gpus=0,1,2`, the returned list would
be [fluid.CUDAPlace(0), fluid.CUDAPlace(1), fluid.CUDAPlace(2)].
If :code:`FLAGS_selected_gpus` is not set, all visible
gpu places would be returned.
gpu places would be returned.
If :code:`device_ids` is not None, it should be the device
ids of gpus. For example, if :code:`device_ids=[0,1,2]`,
the returned list would be
ids of gpus. For example, if :code:`device_ids=[0,1,2]`,
the returned list would be
[fluid.CUDAPlace(0), fluid.CUDAPlace(1), fluid.CUDAPlace(2)].
Args:
Args:
device_ids (None|list(int)|tuple(int)): gpu device id list.
Returns:
@ -133,11 +133,11 @@ def cuda_places(device_ids=None):
def cpu_places(device_count=None):
'''
Create a list of :code:`fluid.CPUPlace` objects.
If :code:`device_count` is None, the device count would
be determined by environment variable :code:`CPU_NUM`.
be determined by environment variable :code:`CPU_NUM`.
If :code:`CPU_NUM` is not set, the device count would
be determined by :code:`multiprocessing.cpu_count()`.
be determined by :code:`multiprocessing.cpu_count()`.
Args:
device_count (None|int): device number.
@ -155,9 +155,9 @@ def cuda_pinned_places(device_count=None):
Create a list of :code:`fluid.CUDAPinnedPlace` objects.
If :code:`device_count` is None, the device count would
be determined by environment variable :code:`CPU_NUM`.
be determined by environment variable :code:`CPU_NUM`.
If :code:`CPU_NUM` is not set, the device count would
be determined by :code:`multiprocessing.cpu_count()`.
be determined by :code:`multiprocessing.cpu_count()`.
Args:
device_count (None|int): device number.
@ -2716,6 +2716,11 @@ class Program(object):
# whether the program is optimized by memory_optimize_transpiler
self.__is_mem_optimized = False
# if this program has been optimized by distributed optimizer
# fleet_opt will be given a value
self._fleet_opt = None
self._program_config = None
@property
def _is_mem_optimized(self):
# if the program is optimized, operator input/outputs

Loading…
Cancel
Save