diff --git a/paddle/operators/feed_op.cc b/paddle/operators/feed_op.cc index d738e1850c..789d01e002 100644 --- a/paddle/operators/feed_op.cc +++ b/paddle/operators/feed_op.cc @@ -52,7 +52,11 @@ class FeedOp : public framework::OperatorBase { platform::DeviceContextPool &pool = platform::DeviceContextPool::Instance(); auto &dev_ctx = *pool.Get(place); - framework::Copy(feed_item, place, dev_ctx, out_item); + if (platform::is_same_place(feed_item.place(), place)) { + out_item->ShareDataWith(feed_item); + } else { + framework::Copy(feed_item, place, dev_ctx, out_item); + } out_item->set_lod(feed_item.lod()); } }; diff --git a/paddle/scripts/docker/build.sh b/paddle/scripts/docker/build.sh index fbae37b2ca..8369ded8cb 100644 --- a/paddle/scripts/docker/build.sh +++ b/paddle/scripts/docker/build.sh @@ -40,6 +40,7 @@ function cmake_gen() { -DWITH_MKL=${WITH_MKL:-ON} -DWITH_AVX=${WITH_AVX:-OFF} -DWITH_GOLANG=${WITH_GOLANG:-ON} + -DCUDA_ARCH_NAME=${CUDA_ARCH_NAME:-All} -DWITH_SWIG_PY=ON -DWITH_C_API=${WITH_C_API:-OFF} -DWITH_PYTHON=${WITH_PYTHON:-ON} @@ -62,6 +63,7 @@ EOF -DWITH_MKL=${WITH_MKL:-ON} \ -DWITH_AVX=${WITH_AVX:-OFF} \ -DWITH_GOLANG=${WITH_GOLANG:-ON} \ + -DCUDA_ARCH_NAME=${CUDA_ARCH_NAME:-All} \ -DWITH_SWIG_PY=${WITH_SWIG_PY:-ON} \ -DWITH_C_API=${WITH_C_API:-OFF} \ -DWITH_PYTHON=${WITH_PYTHON:-ON} \ diff --git a/paddle/scripts/docker/test.sh b/paddle/scripts/docker/test.sh new file mode 100755 index 0000000000..8180737a8f --- /dev/null +++ b/paddle/scripts/docker/test.sh @@ -0,0 +1,30 @@ +#!/bin/bash +set -e + +# the number of process to run tests +NUM_PROC=6 + +# calculate and set the memory usage for each process +MEM_USAGE=$(printf "%.2f" `echo "scale=5; 1.0 / $NUM_PROC" | bc`) +export FLAGS_fraction_of_gpu_memory_to_use=$MEM_USAGE + +# get the CUDA device count +CUDA_DEVICE_COUNT=$(nvidia-smi -L | wc -l) + +for (( i = 0; i < $NUM_PROC; i++ )); do + cuda_list=() + for (( j = 0; j < $CUDA_DEVICE_COUNT; j++ )); do + s=$[i+j] + n=$[s%CUDA_DEVICE_COUNT] + if [ $j -eq 0 ]; then + cuda_list=("$n") + else + cuda_list="$cuda_list,$n" + fi + done + echo $cuda_list + # CUDA_VISIBLE_DEVICES http://acceleware.com/blog/cudavisibledevices-masking-gpus + # ctest -I https://cmake.org/cmake/help/v3.0/manual/ctest.1.html?highlight=ctest + env CUDA_VISIBLE_DEVICES=$cuda_list ctest -I $i,,$NUM_PROC --output-on-failure & +done +wait