From 2c6cfce70e280ab75ee648c169c5489f0e3c2496 Mon Sep 17 00:00:00 2001 From: wandongdong Date: Tue, 15 Sep 2020 00:33:52 -0700 Subject: [PATCH] fix fp16 bug and add gpu fp16 model to ci --- .../runtime/kernel/opencl/kernel/concat.cc | 1 + .../kernel/opencl/kernel/convolution.cc | 2 +- .../kernel/opencl/kernel/depthwise_conv2d.cc | 9 ++++- .../kernel/opencl/subgraph_opencl_kernel.cc | 13 +++++++ mindspore/lite/test/models_fp16_gpu.cfg | 2 + mindspore/lite/test/models_tflite_gpu.cfg | 1 + mindspore/lite/test/run_benchmark_nets.sh | 37 +++++++++++++++++++ 7 files changed, 63 insertions(+), 2 deletions(-) create mode 100644 mindspore/lite/test/models_fp16_gpu.cfg diff --git a/mindspore/lite/src/runtime/kernel/opencl/kernel/concat.cc b/mindspore/lite/src/runtime/kernel/opencl/kernel/concat.cc index 4f44aa4723..cca3a8b6eb 100644 --- a/mindspore/lite/src/runtime/kernel/opencl/kernel/concat.cc +++ b/mindspore/lite/src/runtime/kernel/opencl/kernel/concat.cc @@ -14,6 +14,7 @@ * limitations under the License. */ #include +#include #include #include #include "src/kernel_registry.h" diff --git a/mindspore/lite/src/runtime/kernel/opencl/kernel/convolution.cc b/mindspore/lite/src/runtime/kernel/opencl/kernel/convolution.cc index 5701970747..4ca21a669e 100644 --- a/mindspore/lite/src/runtime/kernel/opencl/kernel/convolution.cc +++ b/mindspore/lite/src/runtime/kernel/opencl/kernel/convolution.cc @@ -69,7 +69,7 @@ int ConvolutionOpenCLKernel::Init() { TILES_X_ = UP_DIV(OW_, 4); TILES_Y_ = UP_DIV(OH_, 4); TILES_XY_ = TILES_X_ * TILES_Y_; - use_winograd_ = UseWinograd4x4To6x6(); + use_winograd_ = UseWinograd4x4To6x6() && use_fp16_; // build kernel if (use_winograd_) { diff --git a/mindspore/lite/src/runtime/kernel/opencl/kernel/depthwise_conv2d.cc b/mindspore/lite/src/runtime/kernel/opencl/kernel/depthwise_conv2d.cc index 811f3f5851..0082243c41 100644 --- a/mindspore/lite/src/runtime/kernel/opencl/kernel/depthwise_conv2d.cc +++ b/mindspore/lite/src/runtime/kernel/opencl/kernel/depthwise_conv2d.cc @@ -134,7 +134,14 @@ int DepthwiseConv2dOpenCLKernel::InitBuffer() { size_t up_co_size = C4NUM * CO4 * dtype_size; memset(bias_data_, 0, up_co_size); auto ori_bias = in_tensors_.at(kBiasIndex)->MutableData(); - memcpy(bias_data_, ori_bias, out_tensors_[0]->Channel() * dtype_size); + if (is_fp16 && in_tensors_.at(kBiasIndex)->data_type() == kNumberTypeFloat32) { + float16_t *bias_ptr = static_cast(bias_data_); + for (size_t i = 0; i < in_tensors_.at(kBiasIndex)->ElementsNum(); ++i) { + bias_ptr[i] = static_cast(static_cast(ori_bias)[i]); + } + } else { + memcpy(bias_data_, ori_bias, out_tensors_[0]->Channel() * dtype_size); + } allocator->UnmapBuffer(bias_data_); } else { MS_ASSERT(in_tensors_.size() == kInputSize1); diff --git a/mindspore/lite/src/runtime/kernel/opencl/subgraph_opencl_kernel.cc b/mindspore/lite/src/runtime/kernel/opencl/subgraph_opencl_kernel.cc index eb24a5df23..d77797a23e 100644 --- a/mindspore/lite/src/runtime/kernel/opencl/subgraph_opencl_kernel.cc +++ b/mindspore/lite/src/runtime/kernel/opencl/subgraph_opencl_kernel.cc @@ -56,6 +56,19 @@ int SubGraphOpenCLKernel::GenToFormatOp(const std::vector &in_te } for (size_t i = 0; i < in_tensors.size(); ++i) { if (in_tensors.at(i)->shape().size() <= 1) { + if (mem_type == OpenCLMemType::IMG) { + for (auto &iv : in_kernels[i]) { + auto tensors = iv->in_tensors(); + tensors.emplace_back(in_tensors.at(i)); + iv->set_in_tensors(tensors); + } + } else { + for (auto &iv : in_kernels[i]) { + auto tensors = iv->out_tensors(); + tensors.emplace_back(in_tensors.at(i)); + iv->set_out_tensors(tensors); + } + } continue; } OpenCLKernel *cur_opencl_op = reinterpret_cast(in_kernels[i][0]); diff --git a/mindspore/lite/test/models_fp16_gpu.cfg b/mindspore/lite/test/models_fp16_gpu.cfg new file mode 100644 index 0000000000..7e9823d43e --- /dev/null +++ b/mindspore/lite/test/models_fp16_gpu.cfg @@ -0,0 +1,2 @@ +mobilenet_v1_1.0_224.tflite +mobilenet_v2_1.0_224.tflite diff --git a/mindspore/lite/test/models_tflite_gpu.cfg b/mindspore/lite/test/models_tflite_gpu.cfg index 200f95b60c..7d6d2f31f3 100644 --- a/mindspore/lite/test/models_tflite_gpu.cfg +++ b/mindspore/lite/test/models_tflite_gpu.cfg @@ -1,6 +1,7 @@ mobilenet_v1_1.0_224.tflite mobilenet_v2_1.0_224.tflite resnet.tflite +squeezenet.tflite mtk_AADB_HADB_MBV2_model_fp32.tflite hiai_cn_recognize_modify_padv2.tflite hiai_cv_focusShootOCRModel_08.tflite diff --git a/mindspore/lite/test/run_benchmark_nets.sh b/mindspore/lite/test/run_benchmark_nets.sh index 0c78e3788a..2edfb2bf4f 100644 --- a/mindspore/lite/test/run_benchmark_nets.sh +++ b/mindspore/lite/test/run_benchmark_nets.sh @@ -479,6 +479,42 @@ function Run_arm64() { fi done < ${models_tflite_gpu_config} + # Run GPU fp16 converted models: + while read line; do + model_name=${line} + if [[ $model_name == \#* ]]; then + continue + fi + echo ${model_name} >> "${run_benchmark_log_file}" + echo 'cd /data/local/tmp/benchmark_test' > adb_run_cmd.txt + echo 'export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/data/local/tmp/benchmark_test;./benchmark --device=GPU --modelPath='${model_name}'.ms --inDataPath=/data/local/tmp/input_output/input/'${model_name}'.ms.bin --calibDataPath=/data/local/tmp/input_output/output/'${model_name}'.ms.out --warmUpLoopCount=1 --loopCount=1 --fp16Priority=true --accuracyThreshold=5' >> "${run_benchmark_log_file}" + echo 'export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/data/local/tmp/benchmark_test;./benchmark --device=GPU --modelPath='${model_name}'.ms --inDataPath=/data/local/tmp/input_output/input/'${model_name}'.ms.bin --calibDataPath=/data/local/tmp/input_output/output/'${model_name}'.ms.out --warmUpLoopCount=1 --loopCount=1 --fp16Priority=true --accuracyThreshold=5' >> adb_run_cmd.txt + adb -s ${device_id} shell < adb_run_cmd.txt >> "${run_benchmark_log_file}" + if [ $? = 0 ]; then + run_result='arm64_gpu_fp16: '${model_name}' pass' + echo ${run_result} >> ${run_benchmark_result_file} + else + run_result='arm64_gpu_fp16: '${model_name}' failed' + echo ${run_result} >> ${run_benchmark_result_file} + return 1 + fi + # run benchmark test without clib data + echo ${model_name} >> "${run_benchmark_log_file}" + echo 'cd /data/local/tmp/benchmark_test' > adb_run_cmd.txt + echo 'export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/data/local/tmp/benchmark_test;./benchmark --device=GPU --modelPath='${model_name}'.ms --warmUpLoopCount=1 --loopCount=2 --fp16Priority=true --accuracyThreshold=5' >> "${run_benchmark_log_file}" + echo 'export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/data/local/tmp/benchmark_test;./benchmark --device=GPU --modelPath='${model_name}'.ms --warmUpLoopCount=1 --loopCount=2 --fp16Priority=true --accuracyThreshold=5' >> adb_run_cmd.txt + adb -s ${device_id} shell < adb_run_cmd.txt >> "${run_benchmark_log_file}" + if [ $? = 0 ]; then + run_result='arm64_gpu_fp16: '${model_name}' pass' + echo ${run_result} >> ${run_benchmark_result_file} + else + run_result='arm64_gpu_fp16: '${model_name}' failed' + echo ${run_result} >> ${run_benchmark_result_file} + return 1 + fi + #sleep 1 + done < ${models_fp16_gpu_config} + # Run mindir converted models: while read line; do model_name=${line} @@ -574,6 +610,7 @@ models_onnx_config=${basepath}/models_onnx.cfg models_fp16_config=${basepath}/models_fp16.cfg models_mindspore_config=${basepath}/models_mindspore.cfg models_tflite_gpu_config=${basepath}/models_tflite_gpu.cfg +models_fp16_gpu_config=${basepath}/models_fp16_gpu.cfg ms_models_path=${basepath}/ms_models