Add op benchmark ci pipeline in Paddle repo (#28692)
parent
4b05a8be88
commit
c91bb084f4
@ -0,0 +1,120 @@
|
||||
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
import os
|
||||
import json
|
||||
import logging
|
||||
import argparse
|
||||
|
||||
|
||||
def check_path_exists(path):
|
||||
"""Assert whether file/directory exists.
|
||||
"""
|
||||
assert os.path.exists(path), "%s does not exist." % path
|
||||
|
||||
|
||||
def parse_log_file(log_file):
|
||||
"""Load one case result from log file.
|
||||
"""
|
||||
check_path_exists(log_file)
|
||||
|
||||
result = None
|
||||
with open(log_file) as f:
|
||||
for line in f.read().strip().split('\n')[::-1]:
|
||||
try:
|
||||
result = json.loads(line)
|
||||
return result
|
||||
except ValueError:
|
||||
pass # do nothing
|
||||
|
||||
assert result != None, "Parse log file fail!"
|
||||
|
||||
|
||||
def load_benchmark_result_from_logs_dir(logs_dir):
|
||||
"""Load benchmark result from logs directory.
|
||||
"""
|
||||
check_path_exists(logs_dir)
|
||||
|
||||
log_file_path = lambda log_file: os.path.join(logs_dir, log_file)
|
||||
result_lambda = lambda log_file: (log_file, parse_log_file(log_file_path(log_file)))
|
||||
|
||||
return dict(map(result_lambda, os.listdir(logs_dir)))
|
||||
|
||||
|
||||
def compare_benchmark_result(develop_result, pr_result):
|
||||
"""Compare the differences between devlop and pr.
|
||||
"""
|
||||
develop_speed = develop_result.get("speed")
|
||||
pr_speed = pr_result.get("speed")
|
||||
|
||||
assert type(develop_speed) == type(
|
||||
pr_speed), "The types of comparison results need to be consistent."
|
||||
|
||||
if isinstance(develop_speed, dict) and isinstance(pr_speed, dict):
|
||||
pr_gpu_time = pr_speed.get("gpu_time")
|
||||
develop_gpu_time = develop_speed.get("gpu_time")
|
||||
gpu_time_diff = (pr_gpu_time - develop_gpu_time) / develop_gpu_time
|
||||
|
||||
pr_total_time = pr_speed.get("total")
|
||||
develop_total_time = develop_speed.get("total")
|
||||
total_time_diff = (
|
||||
pr_total_time - develop_total_time) / develop_total_time
|
||||
|
||||
# TODO(Avin0323): Print all info for making relu of alart.
|
||||
logging.info("------ OP: %s ------" % pr_result.get("name"))
|
||||
logging.info("GPU time change: %.5f%% (develop: %.7f -> PR: %.7f)" %
|
||||
(gpu_time_diff * 100, develop_gpu_time, pr_gpu_time))
|
||||
logging.info("Total time change: %.5f%% (develop: %.7f -> PR: %.7f)" %
|
||||
(total_time_diff * 100, develop_total_time, pr_total_time))
|
||||
logging.info("backward: %s" % pr_result.get("backward"))
|
||||
logging.info("parameters:")
|
||||
for line in pr_result.get("parameters").strip().split("\n"):
|
||||
logging.info("\t%s" % line)
|
||||
else:
|
||||
# TODO(Avin0323): Accuracy need to add.
|
||||
pass
|
||||
|
||||
return True
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
"""Load result from log directories and compare the differences.
|
||||
"""
|
||||
logging.basicConfig(
|
||||
level=logging.INFO,
|
||||
format="[%(pathname)s:%(lineno)d] [%(levelname)s] %(message)s")
|
||||
|
||||
parser = argparse.ArgumentParser()
|
||||
parser.add_argument(
|
||||
"--develop_logs_dir",
|
||||
type=str,
|
||||
required=True,
|
||||
help="Specify the benchmark result directory of develop branch.")
|
||||
parser.add_argument(
|
||||
"--pr_logs_dir",
|
||||
type=str,
|
||||
required=True,
|
||||
help="Specify the benchmark result directory of PR branch.")
|
||||
args = parser.parse_args()
|
||||
|
||||
develop_result_dict = load_benchmark_result_from_logs_dir(
|
||||
args.develop_logs_dir)
|
||||
|
||||
check_path_exists(args.pr_logs_dir)
|
||||
for log_file in os.listdir(args.pr_logs_dir):
|
||||
develop_result = develop_result_dict.get(log_file)
|
||||
pr_result = parse_log_file(os.path.join(args.pr_logs_dir, log_file))
|
||||
if develop_result is None or pr_result is None:
|
||||
continue
|
||||
compare_benchmark_result(develop_result, pr_result)
|
@ -0,0 +1,187 @@
|
||||
#!/bin/bash
|
||||
|
||||
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
set +ex
|
||||
|
||||
[ -z "$PADDLE_ROOT" ] && PADDLE_ROOT=$(cd $(dirname ${BASH_SOURCE[0]})/.. && pwd)
|
||||
|
||||
# Paddle repo file name -> op name
|
||||
declare -A PADDLE_FILENAME_OP_MAP
|
||||
PADDLE_FILENAME_OP_MAP=(
|
||||
["arg_min_max_op_base.h"]="arg_min arg_max"
|
||||
["arg_min_max_op_base.cu.h"]="arg_min arg_max"
|
||||
["activation_op.cu"]="leaky_relu elu sqrt square pow exp abs log"
|
||||
["activation_op.h"]="relu leaky_relu elu sqrt square pow exp abs log"
|
||||
["activation_op.cc"]="relu leaky_relu elu sqrt square pow exp abs log"
|
||||
)
|
||||
|
||||
# Benchmark repo name -> op name
|
||||
declare -A BENCHMARK_APINAME_OP_MAP
|
||||
BENCHMARK_APINAME_OP_MAP=(
|
||||
["argmin"]="arg_min"
|
||||
["argmax"]="arg_max"
|
||||
)
|
||||
|
||||
# ops that will run benchmark test
|
||||
declare -A CHANGE_OP_MAP
|
||||
|
||||
# ops that benchmark repo has
|
||||
declare -A BENCHMARK_OP_MAP
|
||||
|
||||
# ops that benchmark repo missing
|
||||
declare -A BENCHMARK_MISS_OP_MAP
|
||||
|
||||
function LOG {
|
||||
echo "[$0:${BASH_LINENO[0]}] $*" >&2
|
||||
}
|
||||
|
||||
# Load ops that will run benchmark test
|
||||
function load_CHANGE_OP_MAP {
|
||||
local op_name change_file change_file_name
|
||||
for change_file in $(git diff --name-only origin/develop)
|
||||
do
|
||||
# match directory limit
|
||||
[[ "$change_file" =~ "paddle/fluid/operators/" ]] || continue
|
||||
LOG "[INFO] Found \"${change_file}\" changed."
|
||||
change_file_name=${change_file#*paddle/fluid/operators/}
|
||||
if [ -n "${PADDLE_FILENAME_OP_MAP[$change_file_name]}" ]
|
||||
then
|
||||
for op_name in ${PADDLE_FILENAME_OP_MAP[$change_file_name]}
|
||||
do
|
||||
LOG "[INFO] Load op: \"${op_name}\"."
|
||||
CHANGE_OP_MAP[${op_name}]="dummy"
|
||||
done
|
||||
else
|
||||
LOG "[INFO] Load op: \"${change_file_name%_op*}\"."
|
||||
CHANGE_OP_MAP[${change_file_name%_op*}]="dummy"
|
||||
fi
|
||||
done
|
||||
[ ${#CHANGE_OP_MAP[*]} -eq 0 ] && LOG "[INFO] No op to test, skip this ci." && exit 0
|
||||
}
|
||||
|
||||
# Clone benchmark repo
|
||||
function prepare_benchmark_environment {
|
||||
LOG "[INFO] Clone benchmark repo ..."
|
||||
git clone https://github.com/PaddlePaddle/benchmark.git
|
||||
[ $? -ne 0 ] && LOG "[FATAL] Clone benchmark repo fail." && exit -1
|
||||
LOG "[INFO] Collect api info ..."
|
||||
python benchmark/api/deploy/collect_api_info.py \
|
||||
--test_module_name tests_v2 \
|
||||
--info_file api_info.txt >& 2
|
||||
[ $? -ne 0 ] && LOG "[FATAL] Collect api info fail." && exit -1
|
||||
}
|
||||
|
||||
# Load ops that will
|
||||
function load_BENCHMARK_OP_MAP {
|
||||
local line op_name api_name
|
||||
prepare_benchmark_environment
|
||||
for line in $(cat api_info.txt)
|
||||
do
|
||||
api_name=${line%%,*}
|
||||
if [ -n "${BENCHMARK_APINAME_OP_MAP[$api_name]}" ]
|
||||
then
|
||||
op_name=${BENCHMARK_APINAME_OP_MAP[$api_name]}
|
||||
else
|
||||
op_name=$api_name
|
||||
fi
|
||||
if [ -n "${CHANGE_OP_MAP[$op_name]}" ]
|
||||
then
|
||||
LOG "[INFO] Load benchmark settings with op \"${op_name}\"."
|
||||
BENCHMARK_OP_MAP[$op_name]=$line
|
||||
fi
|
||||
done
|
||||
}
|
||||
|
||||
# compile and install paddlepaddle
|
||||
function compile_install_paddlepaddle {
|
||||
LOG "[DEBUG] Compiling install package ..."
|
||||
export WITH_GPU=ON
|
||||
export WITH_AVX=ON
|
||||
export WITH_MKL=ON
|
||||
export RUN_TEST=OFF
|
||||
export WITH_PYTHON=ON
|
||||
export WITH_TESTING=OFF
|
||||
export BUILD_TYPE=Release
|
||||
export WITH_DISTRIBUTE=OFF
|
||||
export PYTHON_ABI=cp37-cp37m
|
||||
export CMAKE_BUILD_TYPE=Release
|
||||
[ -d build ] && rm -rf build
|
||||
bash paddle/scripts/paddle_build.sh build
|
||||
[ $? -ne 0 ] && LOG "[FATAL] compile fail." && exit 7
|
||||
LOG "[DEBUG] Uninstall Paddle ..."
|
||||
pip uninstall -y paddlepaddle paddlepaddle_gpu
|
||||
LOG "[DEBUG] Install Paddle ..."
|
||||
pip install build/python/dist/paddlepaddle_gpu-0.0.0-cp37-cp37m-linux_x86_64.whl
|
||||
}
|
||||
|
||||
# run op benchmark test
|
||||
function run_op_benchmark_test {
|
||||
local logs_dir op_name branch_name api_info_file
|
||||
api_info_file="$(pwd)/api_info.txt"
|
||||
[ -f "$api_info_file" ] && rm -f $api_info_file
|
||||
for api_info in ${BENCHMARK_OP_MAP[*]}
|
||||
do
|
||||
echo "$api_info" >> $api_info_file
|
||||
done
|
||||
LOG "[INFO] Uninstall "
|
||||
for branch_name in "develop" "test_pr"
|
||||
do
|
||||
git checkout $branch_name
|
||||
[ $? -ne 0 ] && LOG "[FATAL] Missing branh ${branch_name}." && exit 7
|
||||
LOG "[INFO] Now branch name is ${branch_name}."
|
||||
compile_install_paddlepaddle
|
||||
logs_dir="$(pwd)/logs-${branch_name}"
|
||||
[ -d $logs_dir ] && rm -rf $logs_dir/* || mkdir -p $logs_dir
|
||||
[ -z "$VISIBLE_DEVICES" ] && export VISIBLE_DEVICES=0
|
||||
pushd benchmark/api > /dev/null
|
||||
bash deploy/main_control.sh tests_v2 \
|
||||
tests_v2/configs \
|
||||
$logs_dir \
|
||||
$VISIBLE_DEVICES \
|
||||
"gpu" \
|
||||
"speed" \
|
||||
$api_info_file \
|
||||
"paddle"
|
||||
popd > /dev/null
|
||||
done
|
||||
}
|
||||
|
||||
# diff benchmakr result and miss op
|
||||
function summary_problems {
|
||||
local op_name
|
||||
python ${PADDLE_ROOT}/tools/check_op_benchmark_result.py \
|
||||
--develop_logs_dir $(pwd)/logs-develop \
|
||||
--pr_logs_dir $(pwd)/logs-test_pr
|
||||
for op_name in ${!CHANGE_OP_MAP[@]}
|
||||
do
|
||||
if [ -z "${BENCHMARK_OP_MAP[$op_name]}" ]
|
||||
then
|
||||
LOG "[WARNING] Missing test script of \"${op_name}\" in benchmark."
|
||||
fi
|
||||
done
|
||||
}
|
||||
|
||||
function main {
|
||||
LOG "[INFO] Start run op benchmark test ..."
|
||||
load_CHANGE_OP_MAP
|
||||
load_BENCHMARK_OP_MAP
|
||||
run_op_benchmark_test
|
||||
summary_problems
|
||||
LOG "[INFO] Op benchmark run success and no error!"
|
||||
exit 0
|
||||
}
|
||||
|
||||
main
|
Loading…
Reference in new issue