Merge branch 'develop' of https://github.com/PaddlePaddle/Paddle into trt_dy_lib

test=develop
revert-13787-revert-13637-optimize-opyreader
nhzlx 7 years ago
commit f3af90d121

@ -72,7 +72,7 @@ option(WITH_INFERENCE "Compile fluid inference library" ON)
option(WITH_INFERENCE_API_TEST "Test fluid inference high-level api interface" OFF)
option(WITH_SYSTEM_BLAS "Use system blas library" OFF)
option(PY_VERSION "Compile PaddlePaddle with python3 support" ${PY_VERSION})
option(WITH_FAST_MATH "Make use of fast math library" OFF)
option(WITH_FAST_MATH "Make use of fast math library, might affect the precision to some extent" ON)
# PY_VERSION
if(NOT PY_VERSION)

@ -178,6 +178,7 @@ paddle.fluid.layers.batch ArgSpec(args=['reader', 'batch_size'], varargs=None, k
paddle.fluid.layers.double_buffer ArgSpec(args=['reader', 'place', 'name'], varargs=None, keywords=None, defaults=(None, None))
paddle.fluid.layers.random_data_generator ArgSpec(args=['low', 'high', 'shapes', 'lod_levels', 'for_parallel'], varargs=None, keywords=None, defaults=(True,))
paddle.fluid.layers.py_reader ArgSpec(args=['capacity', 'shapes', 'dtypes', 'lod_levels', 'name', 'use_double_buffer'], varargs=None, keywords=None, defaults=(None, None, True))
paddle.fluid.layers.create_py_reader_by_data ArgSpec(args=['capacity', 'feed_list', 'name', 'use_double_buffer'], varargs=None, keywords=None, defaults=(None, True))
paddle.fluid.layers.Preprocessor.__init__ ArgSpec(args=['self', 'reader', 'name'], varargs=None, keywords=None, defaults=(None,))
paddle.fluid.layers.Preprocessor.block ArgSpec(args=[], varargs='args', keywords='kwds', defaults=None)
paddle.fluid.layers.Preprocessor.inputs ArgSpec(args=['self'], varargs=None, keywords=None, defaults=None)

@ -12,6 +12,5 @@ endif(NOT WIN32)
if(WITH_INFERENCE)
# NOTE: please add subdirectory inference at last.
add_subdirectory(inference)
add_subdirectory(train)
endif()
add_subdirectory(train)

@ -27,9 +27,6 @@ void SetConfig(AnalysisConfig *cfg) {
cfg->device = 0;
cfg->enable_ir_optim = true;
cfg->specify_input_name = true;
#ifdef PADDLE_WITH_MKLDNN
cfg->_use_mkldnn = true;
#endif
}
void SetInput(std::vector<std::vector<PaddleTensor>> *inputs) {

@ -22,6 +22,7 @@
#include <cub/cub.cuh> // NOLINT
#include "paddle/fluid/framework/tensor.h"
#include "paddle/fluid/framework/tensor_util.h"
namespace paddle {
namespace operators {
@ -293,7 +294,12 @@ void TensorReduce(const framework::Tensor& x, framework::Tensor* y,
}
auto x_data = x.data<Tx>();
auto y_data = y->mutable_data<Ty>(x.place());
if (reduce_num == 1) return;
if (reduce_num == 1) {
auto out_dims = y->dims();
framework::TensorCopy(x, y->place(), y);
y->Resize(out_dims);
return;
}
#define CUB_BLOCK_DIM_CASE(block_dim) \
case block_dim: { \

@ -600,7 +600,7 @@ EOF
if [[ ${WITH_GPU} == "ON" ]]; then
NCCL_DEPS="apt-get install -y --allow-downgrades libnccl2=2.2.13-1+cuda${CUDA_MAJOR} libnccl-dev=2.2.13-1+cuda${CUDA_MAJOR} || true"
else
NCCL_DEPS=""
NCCL_DEPS="true"
fi
if [[ ${WITH_FLUID_ONLY:-OFF} == "OFF" ]]; then

File diff suppressed because it is too large Load Diff

@ -53,13 +53,22 @@ def simple_fc_net(in_size,
hidden_sizes,
batch_size,
queue_capacity,
use_double_buffer=False):
reader = fluid.layers.py_reader(
capacity=queue_capacity,
shapes=[[-1, in_size], [-1, 1]],
lod_levels=[0, 0],
dtypes=['float32', 'int64'],
use_double_buffer=False)
use_double_buffer=False,
use_feed_list=True):
if use_feed_list:
data = fluid.layers.data(name="data", dtype='float32', shape=[in_size])
label = fluid.layers.data(name='label', dtype='int64', shape=[1])
reader = fluid.layers.create_py_reader_by_data(
capacity=queue_capacity,
use_double_buffer=False,
feed_list=[data, label])
else:
reader = fluid.layers.py_reader(
capacity=queue_capacity,
shapes=[[-1, in_size], [-1, 1]],
lod_levels=[0, 0],
dtypes=['float32', 'int64'],
use_double_buffer=False)
feed_queue = reader.queue
reader = fluid.layers.batch(reader, batch_size=batch_size)
if use_double_buffer:
@ -100,14 +109,16 @@ class TestPyReaderUsingExecutor(unittest.TestCase):
if core.is_compiled_with_cuda() else [False]):
for use_parallel_executor in [False, True]:
for use_double_buffer in [False, True]:
print('Test Parameters:'),
print({
'use_cuda': use_cuda,
'use_parallel_executor': use_parallel_executor,
'use_double_buffer': use_double_buffer
})
self.main(use_cuda, use_parallel_executor,
use_double_buffer)
for use_feed_list in [False, True]:
print('Test Parameters:'),
print({
'use_cuda': use_cuda,
'use_parallel_executor': use_parallel_executor,
'use_double_buffer': use_double_buffer,
'use_feed_list': use_feed_list
})
self.main(use_cuda, use_parallel_executor,
use_double_buffer, use_feed_list)
def random_reader(self):
def reader():
@ -143,12 +154,14 @@ class TestPyReaderUsingExecutor(unittest.TestCase):
def main(self,
use_cuda=True,
use_parallel_executor=False,
use_double_buffer=False):
use_double_buffer=False,
use_feed_list=False):
assert not use_cuda or use_cuda and core.is_compiled_with_cuda()
self.use_cuda = use_cuda
self.use_parallel_executor = use_parallel_executor
self.use_double_buffer = use_double_buffer
self.use_feed_list = use_feed_list
startup_program = fluid.Program()
main_program = fluid.Program()
@ -160,7 +173,8 @@ class TestPyReaderUsingExecutor(unittest.TestCase):
hidden_sizes=self.hidden_sizes,
batch_size=self.batch_size,
queue_capacity=self.queue_capacity,
use_double_buffer=self.use_double_buffer)
use_double_buffer=self.use_double_buffer,
use_feed_list=self.use_feed_list)
place = fluid.CUDAPlace(0) if use_cuda else fluid.CPUPlace()

@ -243,5 +243,87 @@ class TestKeepDimReduceSumMultiAxises(OpTest):
self.check_grad(['X'], 'Out')
class TestReduceSumWithDimOne(OpTest):
def setUp(self):
self.op_type = "reduce_sum"
self.inputs = {'X': np.random.random((10, 1, 1)).astype("float64")}
self.attrs = {'dim': [1, 2], 'keep_dim': True}
self.outputs = {
'Out': self.inputs['X'].sum(axis=tuple(self.attrs['dim']),
keepdims=True)
}
def test_check_output(self):
self.check_output()
def test_check_grad(self):
self.check_grad(['X'], 'Out')
class TestReduceSumWithNumelOne(OpTest):
def setUp(self):
self.op_type = "reduce_sum"
self.inputs = {'X': np.random.random((1, 1)).astype("float64")}
self.attrs = {'dim': [1], 'keep_dim': False}
self.outputs = {
'Out': self.inputs['X'].sum(axis=tuple(self.attrs['dim']),
keepdims=False)
}
def test_check_output(self):
self.check_output()
def test_check_grad(self):
self.check_grad(['X'], 'Out')
class TestReduceMeanWithDimOne(OpTest):
def setUp(self):
self.op_type = "reduce_mean"
self.inputs = {'X': np.random.random((10, 1, 1)).astype("float64")}
self.attrs = {'dim': [1], 'keep_dim': False}
self.outputs = {
'Out': self.inputs['X'].mean(
axis=tuple(self.attrs['dim']), keepdims=False)
}
def test_check_output(self):
self.check_output()
def test_check_grad(self):
self.check_grad(['X'], 'Out')
class TestReduceMeanWithNumelOne(OpTest):
def setUp(self):
self.op_type = "reduce_mean"
self.inputs = {'X': np.random.random((1, 1)).astype("float64")}
self.attrs = {'dim': [1], 'keep_dim': True}
self.outputs = {
'Out': self.inputs['X'].mean(
axis=tuple(self.attrs['dim']), keepdims=True)
}
def test_check_output(self):
self.check_output()
def test_check_grad(self):
self.check_grad(['X'], 'Out')
class TestReduceAll(OpTest):
def setUp(self):
self.op_type = "reduce_sum"
self.inputs = {'X': np.random.random((1, 1, 1)).astype("float64")}
self.attrs = {'reduce_all': True, 'keep_dim': False}
self.outputs = {'Out': self.inputs['X'].sum()}
def test_check_output(self):
self.check_output()
def test_check_grad(self):
self.check_grad(['X'], 'Out')
if __name__ == '__main__':
unittest.main()

Loading…
Cancel
Save