Merge branch 'develop' of https://github.com/PaddlePaddle/Paddle into fix-3923-r
commit
9a3f50d806
@ -0,0 +1,51 @@
|
||||
set -e
|
||||
|
||||
function train() {
|
||||
unset OMP_NUM_THREADS MKL_NUM_THREADS
|
||||
export OMP_DYNAMIC="FALSE"
|
||||
export KMP_AFFINITY="granularity=fine,compact,0,0"
|
||||
topology=$1
|
||||
bs=$2
|
||||
use_mkldnn=$3
|
||||
if [ $3 == "True" ]; then
|
||||
thread=1
|
||||
log="logs/${topology}-mkldnn-${bs}.log"
|
||||
elif [ $3 == "False" ]; then
|
||||
thread=`nproc`
|
||||
# each trainer_count use only 1 core to avoid conflict
|
||||
export OMP_NUM_THREADS=1
|
||||
export MKL_NUM_THREADS=1
|
||||
log="logs/${topology}-${thread}mklml-${bs}.log"
|
||||
else
|
||||
echo "Wrong input $3, use True or False."
|
||||
exit 0
|
||||
fi
|
||||
args="batch_size=${bs}"
|
||||
config="${topology}.py"
|
||||
paddle train --job=time \
|
||||
--config=$config \
|
||||
--use_mkldnn=$use_mkldnn \
|
||||
--use_gpu=False \
|
||||
--trainer_count=$thread \
|
||||
--log_period=10 \
|
||||
--test_period=100 \
|
||||
--config_args=$args \
|
||||
2>&1 | tee ${log}
|
||||
}
|
||||
|
||||
if [ ! -d "train.list" ]; then
|
||||
echo " " > train.list
|
||||
fi
|
||||
if [ ! -d "logs" ]; then
|
||||
mkdir logs
|
||||
fi
|
||||
|
||||
#========== mkldnn ==========#
|
||||
train vgg 64 True
|
||||
train vgg 128 True
|
||||
train vgg 256 True
|
||||
|
||||
#========== mklml ===========#
|
||||
train vgg 64 False
|
||||
train vgg 128 False
|
||||
train vgg 256 False
|
@ -0,0 +1,103 @@
|
||||
#!/usr/bin/env python
|
||||
from paddle.trainer_config_helpers import *
|
||||
|
||||
height = 224
|
||||
width = 224
|
||||
num_class = 1000
|
||||
batch_size = get_config_arg('batch_size', int, 64)
|
||||
layer_num = get_config_arg('layer_num', int, 19)
|
||||
|
||||
args = {'height': height, 'width': width, 'color': True, 'num_class': num_class}
|
||||
define_py_data_sources2(
|
||||
"train.list", None, module="provider", obj="process", args=args)
|
||||
|
||||
settings(
|
||||
batch_size=batch_size,
|
||||
learning_rate=0.01 / batch_size,
|
||||
learning_method=MomentumOptimizer(0.9),
|
||||
regularization=L2Regularization(0.0005 * batch_size))
|
||||
|
||||
img = data_layer(name='image', size=height * width * 3)
|
||||
|
||||
|
||||
def vgg_network(vgg_num=3):
|
||||
tmp = img_conv_group(
|
||||
input=img,
|
||||
num_channels=3,
|
||||
conv_padding=1,
|
||||
conv_num_filter=[64, 64],
|
||||
conv_filter_size=3,
|
||||
conv_act=ReluActivation(),
|
||||
pool_size=2,
|
||||
pool_stride=2,
|
||||
pool_type=MaxPooling())
|
||||
|
||||
tmp = img_conv_group(
|
||||
input=tmp,
|
||||
conv_num_filter=[128, 128],
|
||||
conv_padding=1,
|
||||
conv_filter_size=3,
|
||||
conv_act=ReluActivation(),
|
||||
pool_stride=2,
|
||||
pool_type=MaxPooling(),
|
||||
pool_size=2)
|
||||
|
||||
channels = []
|
||||
for i in range(vgg_num):
|
||||
channels.append(256)
|
||||
tmp = img_conv_group(
|
||||
input=tmp,
|
||||
conv_num_filter=channels,
|
||||
conv_padding=1,
|
||||
conv_filter_size=3,
|
||||
conv_act=ReluActivation(),
|
||||
pool_stride=2,
|
||||
pool_type=MaxPooling(),
|
||||
pool_size=2)
|
||||
channels = []
|
||||
for i in range(vgg_num):
|
||||
channels.append(512)
|
||||
tmp = img_conv_group(
|
||||
input=tmp,
|
||||
conv_num_filter=channels,
|
||||
conv_padding=1,
|
||||
conv_filter_size=3,
|
||||
conv_act=ReluActivation(),
|
||||
pool_stride=2,
|
||||
pool_type=MaxPooling(),
|
||||
pool_size=2)
|
||||
tmp = img_conv_group(
|
||||
input=tmp,
|
||||
conv_num_filter=channels,
|
||||
conv_padding=1,
|
||||
conv_filter_size=3,
|
||||
conv_act=ReluActivation(),
|
||||
pool_stride=2,
|
||||
pool_type=MaxPooling(),
|
||||
pool_size=2)
|
||||
|
||||
tmp = fc_layer(
|
||||
input=tmp,
|
||||
size=4096,
|
||||
act=ReluActivation(),
|
||||
layer_attr=ExtraAttr(drop_rate=0.5))
|
||||
|
||||
tmp = fc_layer(
|
||||
input=tmp,
|
||||
size=4096,
|
||||
act=ReluActivation(),
|
||||
layer_attr=ExtraAttr(drop_rate=0.5))
|
||||
|
||||
return fc_layer(input=tmp, size=num_class, act=SoftmaxActivation())
|
||||
|
||||
|
||||
if layer_num == 16:
|
||||
vgg = vgg_network(3)
|
||||
elif layer_num == 19:
|
||||
vgg = vgg_network(4)
|
||||
else:
|
||||
print("Wrong layer number.")
|
||||
|
||||
lab = data_layer('label', num_class)
|
||||
loss = cross_entropy(input=vgg, label=lab)
|
||||
outputs(loss)
|
File diff suppressed because it is too large
Load Diff
@ -0,0 +1,50 @@
|
||||
INCLUDE(ExternalProject)
|
||||
|
||||
SET(NCCL_SOURCE_DIR ${THIRD_PARTY_PATH}/nccl)
|
||||
|
||||
INCLUDE_DIRECTORIES(${NCCL_SOURCE_DIR}/src/extern_nccl)
|
||||
|
||||
|
||||
if(WITH_DSO)
|
||||
# If we use DSO, we do not build nccl, just download the dependencies
|
||||
set(NCCL_BUILD_COMMAND "")
|
||||
set(NCCL_INSTALL_COMMAND "")
|
||||
set(NCCL_INSTALL_DIR "")
|
||||
else()
|
||||
# otherwise, we build nccl and link it.
|
||||
set(NCCL_BUILD_COMMAND "make -j 8")
|
||||
set(NCCL_INSTALL_COMMAND "make install")
|
||||
SET(NCCL_INSTALL_DIR ${THIRD_PARTY_PATH}/install/nccl)
|
||||
endif()
|
||||
|
||||
ExternalProject_Add(
|
||||
extern_nccl
|
||||
${EXTERNAL_PROJECT_LOG_ARGS}
|
||||
GIT_REPOSITORY "https://github.com/NVIDIA/nccl.git"
|
||||
GIT_TAG "v1.3.4-1"
|
||||
PREFIX "${NCCL_SOURCE_DIR}"
|
||||
UPDATE_COMMAND ""
|
||||
CONFIGURE_COMMAND ""
|
||||
BUILD_COMMAND "${NCCL_BUILD_COMMAND}"
|
||||
INSTALL_COMMAND "${NCCL_INSTALL_COMMAND}"
|
||||
INSTALL_DIR "${NCCL_INSTALL_DIR}"
|
||||
TEST_COMMAND ""
|
||||
)
|
||||
|
||||
if (WITH_DSO)
|
||||
if (${CMAKE_VERSION} VERSION_LESS "3.3.0")
|
||||
set(dummyfile ${CMAKE_CURRENT_BINARY_DIR}/lib_any_dummy.c)
|
||||
file(WRITE ${dummyfile} "const char * dummy_any = \"${dummyfile}\";")
|
||||
add_library(nccl STATIC ${dummyfile})
|
||||
else()
|
||||
add_library(nccl INTERFACE)
|
||||
endif()
|
||||
else()
|
||||
ADD_LIBRARY(nccl STATIC IMPORTED GLOBAL)
|
||||
SET_PROPERTY(TARGET nccl PROPERTY IMPORTED_LOCATION
|
||||
${NCCL_INSTALL_DIR}/lib/libnccl.a)
|
||||
endif()
|
||||
|
||||
add_dependencies(nccl extern_nccl)
|
||||
|
||||
LIST(APPEND external_project_dependencies nccl)
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in new issue