!9765 remove serving in mindspore repo

From: @xu-yfei
Reviewed-by: @zhoufeng54,@kisnwang
Signed-off-by: @kisnwang
pull/9765/MERGE
mindspore-ci-bot 4 years ago committed by Gitee
commit 058fbd2d1f

@ -81,9 +81,4 @@ if (ENABLE_TESTCASES)
add_subdirectory(tests)
endif()
if (ENABLE_SERVING)
add_subdirectory(serving)
add_subdirectory(serving/example/cpp_client)
endif()
include(cmake/package.cmake)

@ -54,7 +54,6 @@ usage()
echo " -V Specify the device version, if -e gpu, default CUDA 10.1, if -e ascend, default Ascend 910"
echo " -I Enable compiling mindspore lite for arm64, arm32 or x86_64, default disable mindspore lite compilation"
echo " -K Compile with AKG, default on"
echo " -s Enable serving module, default off"
echo " -B Enable debugger, default on"
echo " -E Enable IBVERBS for parameter server, default off"
echo " -l Compile with python dependency, default on"
@ -105,7 +104,6 @@ checkopts()
SUPPORT_TRAIN="off"
USE_GLOG="on"
ENABLE_AKG="on"
ENABLE_SERVING="off"
ENABLE_ACL="off"
ENABLE_DEBUGGER="on"
ENABLE_IBVERBS="off"
@ -123,7 +121,7 @@ checkopts()
DEVICE=""
ENABLE_NPU="off"
# Process the options
while getopts 'drvj:c:t:hsb:a:g:p:ie:m:l:I:LRP:D:zM:V:K:swB:En:T:A:C:o:S:k:W:' opt
while getopts 'drvj:c:t:hsb:a:g:p:ie:m:l:I:LRP:D:zM:V:K:B:En:T:A:C:o:S:k:W:' opt
do
OPTARG=$(echo ${OPTARG} | tr '[A-Z]' '[a-z]')
case "${opt}" in
@ -273,16 +271,6 @@ checkopts()
ENABLE_AKG="on"
echo "enable compile with akg"
;;
s)
ENABLE_SERVING="on"
echo "enable serving"
;;
w)
ENABLE_SERVING="on"
echo "enable serving"
ENABLE_ACL="on"
echo "enable acl"
;;
B)
check_on_off $OPTARG B
ENABLE_DEBUGGER="$OPTARG"
@ -366,12 +354,10 @@ checkopts()
DEVICE_VERSION=910
fi
if [[ "X$DEVICE_VERSION" == "X310" ]]; then
ENABLE_SERVING="on"
ENABLE_ACL="on"
elif [[ "X$DEVICE_VERSION" == "X910" ]]; then
ENABLE_D="on"
ENABLE_CPU="on"
ENABLE_SERVING="on"
else
echo "Invalid value ${DEVICE_VERSION} for option -V"
usage
@ -467,9 +453,6 @@ build_mindspore()
if [[ "X$ENABLE_AKG" = "Xon" ]] && [[ "X$ENABLE_D" = "Xon" || "X$ENABLE_GPU" = "Xon" ]]; then
CMAKE_ARGS="${CMAKE_ARGS} -DENABLE_AKG=ON"
fi
if [[ "X$ENABLE_SERVING" = "Xon" ]]; then
CMAKE_ARGS="${CMAKE_ARGS} -DENABLE_SERVING=ON"
fi
if [[ "X$ENABLE_ACL" = "Xon" ]]; then
CMAKE_ARGS="${CMAKE_ARGS} -DENABLE_ACL=ON"
fi

@ -88,7 +88,7 @@ if (ENABLE_MINDDATA)
include(${CMAKE_SOURCE_DIR}/cmake/external_libs/sentencepiece.cmake)
endif()
if (ENABLE_MINDDATA OR ENABLE_SERVING)
if (ENABLE_MINDDATA)
include(${CMAKE_SOURCE_DIR}/cmake/external_libs/jpeg_turbo.cmake)
endif()

@ -119,7 +119,7 @@ if(ENABLE_DEBUGGER)
add_compile_definitions(ENABLE_DEBUGGER)
endif()
if (ENABLE_DEBUGGER OR ENABLE_SERVING OR ENABLE_TESTCASES)
if (ENABLE_DEBUGGER OR ENABLE_TESTCASES)
set(MS_BUILD_GRPC ON)
endif()
if (ENABLE_MINDDATA AND NOT CMAKE_SYSTEM_NAME MATCHES "Windows")

@ -202,7 +202,7 @@ if (ENABLE_CPU AND (ENABLE_D OR ENABLE_GPU))
)
endif()
if (ENABLE_SERVING OR ENABLE_TESTCASES)
if (ENABLE_TESTCASES)
file(GLOB_RECURSE LIBEVENT_LIB_LIST
${libevent_LIBPATH}/libevent*
${libevent_LIBPATH}/libevent_pthreads*
@ -336,29 +336,3 @@ install(
COMPONENT mindspore
)
if (ENABLE_SERVING)
install(
TARGETS ms_serving
DESTINATION ${INSTALL_BASE_DIR}
COMPONENT mindspore
)
install(
FILES ${CMAKE_SOURCE_DIR}/build/mindspore/serving/ms_service_pb2.py
${CMAKE_SOURCE_DIR}/build/mindspore/serving/ms_service_pb2_grpc.py
DESTINATION ${INSTALL_PY_DIR}
COMPONENT mindspore
)
install(
TARGETS inference
DESTINATION ${INSTALL_LIB_DIR}
COMPONENT mindspore
)
install(
FILES ${LIBEVENT_LIB_LIST}
DESTINATION ${INSTALL_LIB_DIR}
COMPONENT mindspore
)
endif ()

@ -350,44 +350,9 @@ if (ENABLE_MINDDATA)
add_subdirectory(minddata/dataset)
endif ()
# build inference
set(LOAD_MINDIR_SRC
${CMAKE_SOURCE_DIR}/mindspore/core/load_mindir/load_model.cc
${CMAKE_SOURCE_DIR}/mindspore/core/load_mindir/anf_model_parser.cc
)
add_library(inference SHARED
${CMAKE_CURRENT_SOURCE_DIR}/backend/session/infer_session.cc
${LOAD_MINDIR_SRC}
)
set_target_properties(inference PROPERTIES INSTALL_RPATH ${MINDSPORE_RPATH})
if (CMAKE_SYSTEM_NAME MATCHES "Darwin")
target_link_libraries(inference PRIVATE ${PYTHON_LIBRARIES} ${SECUREC_LIBRARY}
-Wl,-force_load mindspore proto_input -Wl,-noall_load mindspore_gvar)
else()
target_link_libraries(inference PRIVATE ${PYTHON_LIBRARIES} ${SECUREC_LIBRARY}
-Wl,--whole-archive mindspore proto_input -Wl,--no-whole-archive mindspore_gvar)
endif()
if (ENABLE_D)
find_library(adump_server libadump_server.a ${ASCEND_RUNTIME_PATH} ${ASCEND_TOOLKIT_RUNTIME_PATH})
target_link_libraries(_c_expression PRIVATE ${adump_server})
target_link_libraries(inference PRIVATE ${adump_server} ms_profile)
endif()
if (ENABLE_CPU)
target_link_libraries(inference PRIVATE mindspore::dnnl mindspore::mkldnn)
endif ()
if (USE_GLOG)
target_link_libraries(inference PRIVATE mindspore::glog)
endif()
if (CMAKE_SYSTEM_NAME MATCHES "Linux")
target_link_options(inference PRIVATE -Wl,-init,common_log_init)
elseif (CMAKE_SYSTEM_NAME MATCHES "Darwin")
set_target_properties(inference PROPERTIES MACOSX_RPATH ON)
endif ()
add_subdirectory(cxx_api)

File diff suppressed because it is too large Load Diff

@ -1,65 +0,0 @@
/**
* Copyright 2020 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef MINDSPORE_CCSRC_SESSION_SESSION_H
#define MINDSPORE_CCSRC_SESSION_SESSION_H
#include <vector>
#include <string>
#include <unordered_map>
#include <utility>
#include <memory>
#include <map>
#include "backend/session/session_basic.h"
#include "ir/anf.h"
#include "include/inference.h"
#ifdef ENABLE_D
#include "runtime/context.h"
#endif
namespace mindspore {
namespace inference {
class MSInferSession : public InferSession {
public:
MSInferSession();
~MSInferSession();
Status InitEnv(const std::string &device_type, uint32_t device_id) override;
Status FinalizeEnv() override;
Status LoadModelFromFile(const std::string &file_name, uint32_t &model_id) override;
Status UnloadModel(uint32_t model_id) override;
Status ExecuteModel(uint32_t model_id, const RequestBase &inputs, ReplyBase &outputs) override;
Status GetModelInputsInfo(uint32_t graph_id, std::vector<inference::InferTensor> *tensor_list) const override;
private:
std::shared_ptr<session::SessionBasic> session_impl_ = nullptr;
std::vector<uint32_t> graph_id_;
std::string device_type_;
int32_t device_id_ = 0;
#ifdef ENABLE_D
rtContext_t context_ = nullptr;
#endif
static void RegAllOp();
string AjustTargetName(const std::string &device);
Status CompileGraph(std::shared_ptr<FuncGraph> funcGraphPtr, uint32_t &model_id);
Status CheckModelInputs(uint32_t graph_id, const std::vector<tensor::TensorPtr> &inputs) const;
std::vector<tensor::TensorPtr> RunGraph(uint32_t graph_id, const std::vector<tensor::TensorPtr> &inputs);
};
} // namespace inference
} // namespace mindspore
#endif // MINDSPORE_CCSRC_SESSION_SESSION_BASIC_H

@ -1,125 +0,0 @@
find_package(Threads REQUIRED)
# This branch assumes that gRPC and all its dependencies are already installed
# on this system, so they can be located by find_package().
# Find Protobuf installation
# Looks for protobuf-config.cmake file installed by Protobuf's cmake installation.
#set(protobuf_MODULE_COMPATIBLE TRUE)
#find_package(Protobuf CONFIG REQUIRED)
#message(STATUS "Using protobuf ${protobuf_VERSION}")
add_library(protobuf::libprotobuf ALIAS protobuf::protobuf)
add_executable(protobuf::libprotoc ALIAS protobuf::protoc)
set(_PROTOBUF_LIBPROTOBUF protobuf::libprotobuf)
if (CMAKE_CROSSCOMPILING)
find_program(_PROTOBUF_PROTOC protoc)
else ()
set(_PROTOBUF_PROTOC $<TARGET_FILE:protobuf::protoc>)
endif ()
# Find gRPC installation
# Looks for gRPCConfig.cmake file installed by gRPC's cmake installation.
if (EXISTS ${grpc_ROOT}/lib64)
set(gRPC_DIR "${grpc_ROOT}/lib64/cmake/grpc")
else ()
set(gRPC_DIR "${grpc_ROOT}/lib/cmake/grpc")
endif ()
message("serving using grpc_DIR : " ${gPRC_DIR})
find_package(gRPC CONFIG REQUIRED)
message(STATUS "Using gRPC ${gRPC_VERSION}")
set(_GRPC_GRPCPP gRPC::grpc++)
set(_REFLECTION gRPC::grpc++_reflection)
if (CMAKE_CROSSCOMPILING)
find_program(_GRPC_CPP_PLUGIN_EXECUTABLE grpc_cpp_plugin)
find_program(_GRPC_PYTHON_PLUGIN_EXECUTABLE grpc_python_plugin)
else ()
set(_GRPC_CPP_PLUGIN_EXECUTABLE $<TARGET_FILE:gRPC::grpc_cpp_plugin>)
set(_GRPC_PYTHON_PLUGIN_EXECUTABLE $<TARGET_FILE:gRPC::grpc_python_plugin>)
endif ()
# Proto file
get_filename_component(hw_proto "ms_service.proto" ABSOLUTE)
get_filename_component(hw_proto_path "${hw_proto}" PATH)
# Generated sources
set(hw_proto_srcs "${CMAKE_CURRENT_BINARY_DIR}/ms_service.pb.cc")
set(hw_proto_hdrs "${CMAKE_CURRENT_BINARY_DIR}/ms_service.pb.h")
set(hw_grpc_srcs "${CMAKE_CURRENT_BINARY_DIR}/ms_service.grpc.pb.cc")
set(hw_grpc_hdrs "${CMAKE_CURRENT_BINARY_DIR}/ms_service.grpc.pb.h")
set(hw_py_pb2 "${CMAKE_CURRENT_BINARY_DIR}/ms_service_pb2.py")
set(hw_py_pb2_grpc "${CMAKE_CURRENT_BINARY_DIR}/ms_service_pb2_grpc.py")
add_custom_command(
OUTPUT "${hw_proto_srcs}" "${hw_proto_hdrs}" "${hw_grpc_srcs}" "${hw_grpc_hdrs}" "${hw_py_pb2}" "${hw_py_pb2_grpc}"
COMMAND ${_PROTOBUF_PROTOC}
ARGS --grpc_out "${CMAKE_CURRENT_BINARY_DIR}"
--cpp_out "${CMAKE_CURRENT_BINARY_DIR}"
-I "${hw_proto_path}"
--plugin=protoc-gen-grpc="${_GRPC_CPP_PLUGIN_EXECUTABLE}"
"${hw_proto}"
COMMAND ${_PROTOBUF_PROTOC}
ARGS --grpc_out "${CMAKE_CURRENT_BINARY_DIR}"
--python_out "${CMAKE_CURRENT_BINARY_DIR}"
-I "${hw_proto_path}"
--plugin=protoc-gen-grpc="${_GRPC_PYTHON_PLUGIN_EXECUTABLE}"
"${hw_proto}"
DEPENDS "${hw_proto}")
# Include generated *.pb.h files
include_directories("${CMAKE_CURRENT_BINARY_DIR}" "${CMAKE_CURRENT_SOURCE_DIR}" "${CMAKE_CURRENT_SOURCE_DIR}/core"
"${PROJECT_SOURCE_DIR}/mindspore/ccsrc" "${PROJECT_SOURCE_DIR}/mindspore/core")
file(GLOB_RECURSE CORE_SRC_LIST RELATIVE ${CMAKE_CURRENT_SOURCE_DIR}
"core/*.cc" "core/util/*.cc" "core/version_control/*.cc")
list(APPEND SERVING_SRC "main.cc" ${hw_proto_srcs} ${hw_grpc_srcs} ${CORE_SRC_LIST})
if (ENABLE_ACL)
if (DEFINED ENV{ASCEND_CUSTOM_PATH})
set(ASCEND_PATH $ENV{ASCEND_CUSTOM_PATH})
else ()
set(ASCEND_PATH /usr/local/Ascend)
endif ()
set(ACL_LIB_DIR ${ASCEND_PATH}/acllib/)
set(ATLAS_ACL_LIB_DIR ${ASCEND_PATH}/ascend-toolkit/latest/acllib)
MESSAGE("hisi acl lib dir " ${ACL_LIB_DIR} " ,atlas acl lib dir " ${ATLAS_ACL_LIB_DIR})
include_directories(${ACL_LIB_DIR}/include/)
include_directories(${ATLAS_ACL_LIB_DIR}/include/)
file(GLOB_RECURSE ACL_SESSION_SRC_LIST RELATIVE ${CMAKE_CURRENT_SOURCE_DIR} "acl/*.cc")
list(APPEND SERVING_SRC ${ACL_SESSION_SRC_LIST})
endif ()
include_directories(${CMAKE_BINARY_DIR})
add_executable(ms_serving ${SERVING_SRC})
#libevent
target_link_libraries(ms_serving mindspore::event mindspore::event_pthreads)
target_link_libraries(ms_serving ${_REFLECTION} ${_GRPC_GRPCPP} ${_PROTOBUF_LIBPROTOBUF} pthread)
include(CheckPIESupported)
check_pie_supported()
set_property(TARGET ms_serving PROPERTY POSITION_INDEPENDENT_CODE TRUE)
if (ENABLE_D)
add_compile_definitions(ENABLE_D)
target_link_libraries(ms_serving ${RUNTIME_LIB})
endif ()
if (ENABLE_ACL)
add_compile_definitions(ENABLE_ACL)
add_compile_definitions(ENABLE_DVPP_INTERFACE)
find_library(acl libascendcl.so ${ACL_LIB_DIR}/lib64 ${ATLAS_ACL_LIB_DIR}/lib64)
find_library(acl_retr libacl_retr.so ${ACL_LIB_DIR}/lib64 ${ATLAS_ACL_LIB_DIR}/lib64)
find_library(acl_cblas libacl_cblas.so ${ACL_LIB_DIR}/lib64 ${ATLAS_ACL_LIB_DIR}/lib64)
find_library(acl_dvpp libacl_dvpp.so ${ACL_LIB_DIR}/lib64 ${ATLAS_ACL_LIB_DIR}/lib64)
find_library(acl_runtime libruntime.so ${ACL_LIB_DIR}/lib64 ${ATLAS_ACL_LIB_DIR}/lib64)
target_link_libraries(ms_serving ${acl} ${acl_retr} ${acl_cblas} ${acl_dvpp} ${acl_runtime})
target_link_libraries(ms_serving jpeg_turbo::jpeg securec)
else ()
target_link_libraries(ms_serving inference mindspore_gvar)
endif ()

@ -1,150 +0,0 @@
# MindSpore-based Inference Service Deployment
<!-- TOC -->
- [MindSpore-based Inference Service Deployment](#mindspore-based-inference-service-deployment)
- [Overview](#overview)
- [Starting Serving](#starting-serving)
- [Application Example](#application-example)
- [Exporting Model](#exporting-model)
- [Starting Serving Inference](#starting-serving-inference)
- [Client Samples](#client-samples)
- [Python Client Sample](#python-client-sample)
- [C++ Client Sample](#cpp-client-sample)
<!-- /TOC -->
<a href="https://gitee.com/mindspore/docs/blob/master/tutorials/source_en/advanced_use/serving.md" target="_blank"><img src="../_static/logo_source.png"></a>
## Overview
MindSpore Serving is a lightweight and high-performance service module that helps MindSpore developers efficiently deploy online inference services in the production environment. After completing model training using MindSpore, you can export the MindSpore model and use MindSpore Serving to create an inference service for the model. Currently, only Ascend 910 is supported.
## Starting Serving
After MindSpore is installed using `pip`, the Serving executable program is stored in `/{your python path}/lib/python3.7/site-packages/mindspore/ms_serving`.
Run the following command to start Serving:
```bash
ms_serving [--help] [--model_path <MODEL_PATH>] [--model_name <MODEL_NAME>]
[--port <PORT>] [--device_id <DEVICE_ID>]
```
Parameters are described as follows:
|Parameter|Attribute|Function|Parameter Type|Default Value|Value Range|
|---|---|---|---|---|---|
|`--help`|Optional|Displays the help information about the startup command. |-|-|-|
|`--model_path=<MODEL_PATH>`|Mandatory|Path for storing the model to be loaded. |String|Null|-|
|`--model_name=<MODEL_NAME>`|Mandatory|Name of the model file to be loaded. |String|Null|-|
|`--=port <PORT>`|Optional|Specifies the external Serving port number. |Integer|5500|165535|
|`--device_id=<DEVICE_ID>`|Optional|Specifies device ID to be used.|Integer|0|0 to 7|
> Before running the startup command, add the path `/{your python path}/lib:/{your python path}/lib/python3.7/site-packages/mindspore/lib` to the environment variable `LD_LIBRARY_PATH`.
## Application Example
The following uses a simple network as an example to describe how to use MindSpore Serving.
### Exporting Model
Use [add_model.py](https://gitee.com/mindspore/mindspore/blob/master/serving/example/export_model/add_model.py) to build a network with only the Add operator and export the MindSpore inference deployment model.
```python
python add_model.py
```
Execute the script to generate the `tensor_add.mindir` file. The input of the model is two one-dimensional tensors with shape [2,2], and the output is the sum of the two input tensors.
### Starting Serving Inference
```bash
ms_serving --model_path={model directory} --model_name=tensor_add.mindir
```
If the server prints the `MS Serving Listening on 0.0.0.0:5500` log, the Serving has loaded the inference model.
### Client Samples
#### <span name="python-client-sample">Python Client Sample</span>
Obtain [ms_client.py](https://gitee.com/mindspore/mindspore/blob/master/serving/example/python_client/ms_client.py) and start the Python client.
```bash
python ms_client.py
```
If the following information is displayed, the Serving has correctly executed the inference of the Add network.
```
ms client received:
[[2. 2.]
[2. 2.]]
```
#### <span name="cpp-client-sample">C++ Client Sample</span>
1. Obtain an executable client sample program.
Download the [MindSpore source code](https://gitee.com/mindspore/mindspore). You can use either of the following methods to compile and obtain the client sample program:
+ When MindSpore is compiled using the source code, the Serving C++ client sample program is generated. You can find the `ms_client` executable program in the `build/mindspore/serving/example/cpp_client` directory.
+ Independent compilation
Preinstall [gRPC](https://gRPC.io).
Run the following command in the MindSpore source code path to compile a client sample program:
```bash
cd mindspore/serving/example/cpp_client
mkdir build && cd build
cmake -D GRPC_PATH={grpc_install_dir} ..
make
```
In the preceding command, `{grpc_install_dir}` indicates the gRPC installation path. Replace it with the actual gRPC installation path.
2. Start the client.
Execute `ms_client` to send an inference request to the Serving.
```bash
./ms_client --target=localhost:5500
```
If the following information is displayed, the Serving has correctly executed the inference of the Add network.
```
Compute [[1, 2], [3, 4]] + [[1, 2], [3, 4]]
Add result is 2 4 6 8
client received: RPC OK
```
The client code consists of the following parts:
1. Implement the client based on MSService::Stub and create a client instance.
```
class MSClient {
public:
explicit MSClient(std::shared_ptr<Channel> channel) : stub_(MSService::NewStub(channel)) {}
private:
std::unique_ptr<MSService::Stub> stub_;
};MSClient client(grpc::CreateChannel(target_str, grpc::InsecureChannelCredentials()));
MSClient client(grpc::CreateChannel(target_str, grpc::InsecureChannelCredentials()));
```
2. Build the request input parameter `Request`, output parameter `Reply`, and gRPC client `Context` based on the actual network input.
```
PredictRequest request;
PredictReply reply;
ClientContext context;
//construct tensor
Tensor data;
//set shape
TensorShape shape;
shape.add_dims(4);
*data.mutable_tensor_shape() = shape;
//set type
data.set_tensor_type(ms_serving::MS_FLOAT32);
std::vector<float> input_data{1, 2, 3, 4};
//set datas
data.set_data(input_data.data(), input_data.size());
//add tensor to request
*request.add_data() = data;
*request.add_data() = data;
```
3. Call the gRPC API to communicate with the Serving that has been started, and obtain the return value.
```
Status status = stub_->Predict(&context, request, &reply);
```
For details about the complete code, see [ms_client](https://gitee.com/mindspore/mindspore/blob/master/serving/example/cpp_client/ms_client.cc).

@ -1,151 +0,0 @@
# 基于MindSpore部署推理服务
<!-- TOC -->
- [基于MindSpore部署推理服务](#基于mindspore部署推理服务)
- [概述](#概述)
- [启动Serving服务](#启动serving服务)
- [应用示例](#应用示例)
- [导出模型](#导出模型)
- [启动Serving推理服务](#启动serving推理服务)
- [客户端示例](#客户端示例)
- [Python客户端示例](#python客户端示例)
- [C++客户端示例](#cpp客户端示例)
<!-- /TOC -->
<a href="https://gitee.com/mindspore/docs/blob/master/tutorials/source_zh_cn/advanced_use/serving.md" target="_blank"><img src="../_static/logo_source.png"></a>
## 概述
MindSpore Serving是一个轻量级、高性能的服务模块旨在帮助MindSpore开发者在生产环境中高效部署在线推理服务。当用户使用MindSpore完成模型训练后导出MindSpore模型即可使用MindSpore Serving创建该模型的推理服务。当前Serving仅支持Ascend 910。
## 启动Serving服务
通过pip安装MindSpore后Serving可执行程序位于`/{your python path}/lib/python3.7/site-packages/mindspore/ms_serving` 。
启动Serving服务命令如下
```bash
ms_serving [--help] [--model_path <MODEL_PATH>] [--model_name <MODEL_NAME>]
[--port <PORT>] [--device_id <DEVICE_ID>]
```
参数含义如下
|参数名|属性|功能描述|参数类型|默认值|取值范围|
|---|---|---|---|---|---|
|`--help`|可选|显示启动命令的帮助信息。|-|-|-|
|`--model_path=<MODEL_PATH>`|必选|指定待加载模型的存放路径。|String|空|-|
|`--model_name=<MODEL_NAME>`|必选|指定待加载模型的文件名。|String|空|-|
|`--port=<PORT>`|可选|指定Serving对外的端口号。|Integer|5500|1~65535|
|`--device_id=<DEVICE_ID>`|可选|指定使用的设备号|Integer|0|0~7|
> 执行启动命令前,需将`/{your python path}/lib:/{your python path}/lib/python3.7/site-packages/mindspore/lib`对应的路径加入到环境变量LD_LIBRARY_PATH中 。
## 应用示例
下面以一个简单的网络为例演示MindSpore Serving如何使用。
### 导出模型
使用[add_model.py](https://gitee.com/mindspore/mindspore/blob/master/serving/example/export_model/add_model.py)构造一个只有Add算子的网络并导出MindSpore推理部署模型。
```python
python add_model.py
```
执行脚本,生成`tensor_add.mindir`文件该模型的输入为两个shape为[2,2]的二维Tensor输出结果是两个输入Tensor之和。
### 启动Serving推理服务
```bash
ms_serving --model_path={model directory} --model_name=tensor_add.mindir
```
当服务端打印日志`MS Serving Listening on 0.0.0.0:5500`时表示Serving服务已加载推理模型完毕。
### 客户端示例
#### <span name="python客户端示例">Python客户端示例</span>
获取[ms_client.py](https://gitee.com/mindspore/mindspore/blob/master/serving/example/python_client/ms_client.py)启动Python客户端。
```bash
python ms_client.py
```
显示如下返回值说明Serving服务已正确执行Add网络的推理。
```
ms client received:
[[2. 2.]
[2. 2.]]
```
#### <span name="cpp客户端示例">C++客户端示例</span>
1. 获取客户端示例执行程序
首先需要下载[MindSpore源码](https://gitee.com/mindspore/mindspore)。有两种方式编译并获取客户端示例程序:
+ 从源码编译MindSpore时候将会编译产生Serving C++客户端示例程序,可在`build/mindspore/serving/example/cpp_client`目录下找到`ms_client`可执行程序。
+ 独立编译:
需要先预装[gRPC](https://gRPC.io)。
然后在MindSpore源码路径中执行如下命令编译一个客户端示例程序。
```bash
cd mindspore/serving/example/cpp_client
mkdir build && cd build
cmake -D GRPC_PATH={grpc_install_dir} ..
make
```
其中`{grpc_install_dir}`为gRPC安装时的路径请替换为实际gRPC安装路径。
2. 启动客户端
执行ms_client向Serving服务发送推理请求
```bash
./ms_client --target=localhost:5500
```
显示如下返回值说明Serving服务已正确执行Add网络的推理。
```
Compute [[1, 2], [3, 4]] + [[1, 2], [3, 4]]
Add result is 2 4 6 8
client received: RPC OK
```
客户端代码主要包含以下几个部分:
1. 基于MSService::Stub实现Client并创建Client实例。
```
class MSClient {
public:
explicit MSClient(std::shared_ptr<Channel> channel) : stub_(MSService::NewStub(channel)) {}
private:
std::unique_ptr<MSService::Stub> stub_;
};MSClient client(grpc::CreateChannel(target_str, grpc::InsecureChannelCredentials()));
MSClient client(grpc::CreateChannel(target_str, grpc::InsecureChannelCredentials()));
```
2. 根据网络的实际输入构造请求的入参Request、出参Reply和gRPC的客户端Context。
```
PredictRequest request;
PredictReply reply;
ClientContext context;
//construct tensor
Tensor data;
//set shape
TensorShape shape;
shape.add_dims(4);
*data.mutable_tensor_shape() = shape;
//set type
data.set_tensor_type(ms_serving::MS_FLOAT32);
std::vector<float> input_data{1, 2, 3, 4};
//set datas
data.set_data(input_data.data(), input_data.size());
//add tensor to request
*request.add_data() = data;
*request.add_data() = data;
```
3. 调用gRPC接口和已经启动的Serving服务通信并取回返回值。
```
Status status = stub_->Predict(&context, request, &reply);
```
完整代码参考[ms_client](https://gitee.com/mindspore/mindspore/blob/master/serving/example/cpp_client/ms_client.cc)。

@ -1,243 +0,0 @@
/**
* Copyright 2020 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include <memory>
#include <algorithm>
#include <fstream>
#include "serving/acl/acl_session.h"
#include "include/infer_log.h"
namespace mindspore::inference {
std::shared_ptr<InferSession> InferSession::CreateSession(const std::string &device, uint32_t device_id) {
try {
auto session = std::make_shared<AclSession>();
auto ret = session->InitEnv(device, device_id);
if (ret != SUCCESS) {
return nullptr;
}
return session;
} catch (std::exception &e) {
MSI_LOG_ERROR << "Inference CreatSession failed";
return nullptr;
}
}
Status AclSession::LoadModelFromFile(const std::string &file_name, uint32_t &model_id) {
Status ret = model_process_.LoadModelFromFile(file_name, model_id);
if (ret != SUCCESS) {
MSI_LOG_ERROR << "Load model from file failed, model file " << file_name;
return FAILED;
}
std::string dvpp_config_file;
auto index = file_name.rfind(".");
if (index == std::string::npos) {
dvpp_config_file = file_name;
} else {
dvpp_config_file = file_name.substr(0, index);
}
dvpp_config_file += "_dvpp_config.json";
std::ifstream fp(dvpp_config_file);
if (!fp.is_open()) {
MSI_LOG_INFO << "Dvpp config file not exist, model will execute with tensors as inputs, dvpp config file "
<< dvpp_config_file;
return SUCCESS;
}
fp.close();
if (dvpp_process_.InitWithJsonConfig(dvpp_config_file) != SUCCESS) {
MSI_LOG_ERROR << "Dvpp config file parse error, dvpp config file " << dvpp_config_file;
return FAILED;
}
execute_with_dvpp_ = true;
MSI_LOG_INFO << "Dvpp config success";
return SUCCESS;
}
Status AclSession::UnloadModel(uint32_t /*model_id*/) {
model_process_.UnLoad();
return SUCCESS;
}
Status AclSession::ExecuteModel(uint32_t /*model_id*/, const RequestBase &request,
ReplyBase &reply) { // set d context
aclError rt_ret = aclrtSetCurrentContext(context_);
if (rt_ret != ACL_ERROR_NONE) {
MSI_LOG_ERROR << "set the ascend device context failed";
return FAILED;
}
return model_process_.Execute(request, reply);
}
Status AclSession::PreProcess(uint32_t /*model_id*/, const InferImagesBase *images_input,
ImagesDvppOutput &dvpp_output) {
if (images_input == nullptr) {
MSI_LOG_ERROR << "images input is nullptr";
return FAILED;
}
auto batch_size = images_input->batch_size();
if (batch_size <= 0) {
MSI_LOG_ERROR << "invalid batch size " << images_input->batch_size();
return FAILED;
}
std::vector<const void *> pic_buffer_list;
std::vector<size_t> pic_size_list;
for (size_t i = 0; i < batch_size; i++) {
const void *pic_buffer = nullptr;
uint32_t pic_size = 0;
if (!images_input->get(i, pic_buffer, pic_size) || pic_buffer == nullptr || pic_size == 0) {
MSI_LOG_ERROR << "Get request " << 0 << "th buffer failed";
return FAILED;
}
pic_buffer_list.push_back(pic_buffer);
pic_size_list.push_back(pic_size);
}
auto ret = dvpp_process_.Process(pic_buffer_list, pic_size_list, dvpp_output.buffer_device, dvpp_output.buffer_size);
if (ret != SUCCESS) {
MSI_LOG_ERROR << "dvpp process failed";
return ret;
}
return SUCCESS;
}
Status AclSession::ExecuteModel(uint32_t model_id, const ImagesRequestBase &images_inputs, // images for preprocess
const RequestBase &request, ReplyBase &reply) {
if (!execute_with_dvpp_) {
MSI_LOG_ERROR << "Unexpected images as inputs, DVPP not config";
return INFER_STATUS(INVALID_INPUTS) << "Unexpected images as inputs, DVPP not config";
}
aclError rt_ret = aclrtSetCurrentContext(context_);
if (rt_ret != ACL_ERROR_NONE) {
MSI_LOG_ERROR << "set the ascend device context failed";
return FAILED;
}
if (images_inputs.size() != 1) {
MSI_LOG_ERROR << "Only support one input to do DVPP preprocess";
return INFER_STATUS(INVALID_INPUTS) << "Only support one input to do DVPP preprocess";
}
if (images_inputs[0] == nullptr) {
MSI_LOG_ERROR << "Get first images input failed";
return FAILED;
}
if (images_inputs[0]->batch_size() != model_process_.GetBatchSize()) {
MSI_LOG_ERROR << "Input batch size " << images_inputs[0]->batch_size() << " not match Model batch size "
<< model_process_.GetBatchSize();
return INFER_STATUS(INVALID_INPUTS) << "Input batch size " << images_inputs[0]->batch_size()
<< " not match Model batch size " << model_process_.GetBatchSize();
}
if (request.size() != 0) {
MSI_LOG_ERROR << "only support one input, images input size is 1, tensor inputs is not 0 " << request.size();
return INFER_STATUS(INVALID_INPUTS) << "only support one input, images input size is 1, tensor inputs is not 0 "
<< request.size();
}
ImagesDvppOutput dvpp_output;
Status ret = PreProcess(model_id, images_inputs[0], dvpp_output);
if (ret != SUCCESS) {
MSI_LOG_ERROR << "DVPP preprocess failed";
return ret;
}
ret = model_process_.Execute(dvpp_output.buffer_device, dvpp_output.buffer_size, reply);
if (ret != SUCCESS) {
MSI_LOG_ERROR << "Execute model failed";
return ret;
}
return SUCCESS;
}
Status AclSession::InitEnv(const std::string &device_type, uint32_t device_id) {
device_type_ = device_type;
device_id_ = device_id;
auto ret = aclInit(nullptr);
if (ret != ACL_ERROR_NONE) {
MSI_LOG_ERROR << "Execute aclInit Failed";
return FAILED;
}
MSI_LOG_INFO << "acl init success";
ret = aclrtSetDevice(device_id_);
if (ret != ACL_ERROR_NONE) {
MSI_LOG_ERROR << "acl open device " << device_id_ << " failed";
return FAILED;
}
MSI_LOG_INFO << "open device " << device_id_ << " success";
ret = aclrtCreateContext(&context_, device_id_);
if (ret != ACL_ERROR_NONE) {
MSI_LOG_ERROR << "acl create context failed";
return FAILED;
}
MSI_LOG_INFO << "create context success";
ret = aclrtCreateStream(&stream_);
if (ret != ACL_ERROR_NONE) {
MSI_LOG_ERROR << "acl create stream failed";
return FAILED;
}
MSI_LOG_INFO << "create stream success";
aclrtRunMode run_mode;
ret = aclrtGetRunMode(&run_mode);
if (ret != ACL_ERROR_NONE) {
MSI_LOG_ERROR << "acl get run mode failed";
return FAILED;
}
bool is_device = (run_mode == ACL_DEVICE);
model_process_.SetIsDevice(is_device);
MSI_LOG_INFO << "get run mode success is device input/output " << is_device;
if (dvpp_process_.InitResource(stream_) != SUCCESS) {
MSI_LOG_ERROR << "dvpp init resource failed";
return FAILED;
}
MSI_LOG_INFO << "Init acl success, device id " << device_id_;
return SUCCESS;
}
Status AclSession::FinalizeEnv() {
dvpp_process_.Finalize();
aclError ret;
if (stream_ != nullptr) {
ret = aclrtDestroyStream(stream_);
if (ret != ACL_ERROR_NONE) {
MSI_LOG_ERROR << "destroy stream failed";
}
stream_ = nullptr;
}
MSI_LOG_INFO << "end to destroy stream";
if (context_ != nullptr) {
ret = aclrtDestroyContext(context_);
if (ret != ACL_ERROR_NONE) {
MSI_LOG_ERROR << "destroy context failed";
}
context_ = nullptr;
}
MSI_LOG_INFO << "end to destroy context";
ret = aclrtResetDevice(device_id_);
if (ret != ACL_ERROR_NONE) {
MSI_LOG_ERROR << "reset devie " << device_id_ << " failed";
}
MSI_LOG_INFO << "end to reset device " << device_id_;
ret = aclFinalize();
if (ret != ACL_ERROR_NONE) {
MSI_LOG_ERROR << "finalize acl failed";
}
MSI_LOG_INFO << "end to finalize acl";
return SUCCESS;
}
AclSession::AclSession() = default;
} // namespace mindspore::inference

@ -1,57 +0,0 @@
/**
* Copyright 2020 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef MINDSPORE_SERVING_ACL_SESSION_H
#define MINDSPORE_SERVING_ACL_SESSION_H
#include <vector>
#include <string>
#include <unordered_map>
#include <utility>
#include <memory>
#include <map>
#include "include/inference.h"
#include "serving/acl/model_process.h"
#include "serving/acl/dvpp_process.h"
namespace mindspore {
namespace inference {
class AclSession : public InferSession {
public:
AclSession();
Status InitEnv(const std::string &device_type, uint32_t device_id) override;
Status FinalizeEnv() override;
Status LoadModelFromFile(const std::string &file_name, uint32_t &model_id) override;
Status UnloadModel(uint32_t model_id) override;
Status ExecuteModel(uint32_t model_id, const RequestBase &request, ReplyBase &reply) override;
Status ExecuteModel(uint32_t model_id, const ImagesRequestBase &images_inputs, // images for preprocess
const RequestBase &request, ReplyBase &reply) override;
private:
std::string device_type_;
int32_t device_id_;
aclrtStream stream_ = nullptr;
aclrtContext context_ = nullptr;
ModelProcess model_process_;
bool execute_with_dvpp_ = false;
DvppProcess dvpp_process_;
Status PreProcess(uint32_t model_id, const InferImagesBase *images_input, ImagesDvppOutput &dvpp_output);
};
} // namespace inference
} // namespace mindspore
#endif // MINDSPORE_SERVING_ACL_SESSION_H

File diff suppressed because it is too large Load Diff

@ -1,159 +0,0 @@
/**
* Copyright 2020 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef INC_DVPP_PROCESS_ACL
#define INC_DVPP_PROCESS_ACL
#include <vector>
#include <string>
#include "acl/acl.h"
#include "acl/acl_mdl.h"
#include "acl/acl_rt.h"
#include "acl/ops/acl_dvpp.h"
#include "include/inference.h"
namespace mindspore::inference {
struct DvppDecodePara {
acldvppPixelFormat pixel_format = PIXEL_FORMAT_YUV_SEMIPLANAR_420;
};
struct DvppResizePara {
uint32_t output_width = 0;
uint32_t output_height = 0;
};
enum DvppCropType {
// crop left,top,right,bottom is given in config
kDvppCropTypeOffset = 0,
// crop left,top,right,bottom is calculated by image width/height and output crop width/height
kDvppCropTypeCentre = 1,
};
struct DvppRoiArea {
uint32_t left = 0;
uint32_t top = 0;
uint32_t right = 0;
uint32_t bottom = 0;
};
struct DvppCropInfo {
DvppCropType crop_type = kDvppCropTypeOffset;
DvppRoiArea crop_area; // when kDvppCropTypeOffset
uint32_t crop_width = 0; // when kDvppCropTypeCentre
uint32_t crop_height = 0; // when kDvppCropTypeCentre
};
struct DvppCropPara {
DvppCropInfo crop_info;
uint32_t output_width = 0;
uint32_t output_height = 0;
};
struct DvppCropAndPastePara {
DvppCropInfo crop_info;
DvppRoiArea paste_area;
uint32_t output_width = 0;
uint32_t output_height = 0;
};
class DvppProcess {
public:
DvppProcess();
~DvppProcess();
Status InitResource(aclrtStream stream);
void Finalize();
Status InitJpegDecodePara(const DvppDecodePara &decode_para); // jpeg decode + (resize | crop)
Status InitResizePara(const DvppResizePara &resize_para); // jpeg decode + resize
Status InitCropPara(const DvppCropPara &crop_para); // jpeg decode + crop
Status InitCropAndPastePara(const DvppCropAndPastePara &crop_and_paste_para); // jpeg decode + crop&paste
Status InitWithJsonConfig(const std::string &json_config);
// output device buffer will be destroy by DvppProcess itself.
Status Process(const void *pic_buffer, size_t pic_buffer_size, void *&output_device_buffer, size_t &output_size);
Status Process(const std::vector<const void *> &pic_buffer_list, const std::vector<size_t> &pic_buffer_size_list,
void *&output_device_buffer, size_t &output_size);
private:
uint32_t pic_width_ = 0;
uint32_t pic_height_ = 0;
DvppDecodePara decode_para_;
DvppResizePara resize_para_;
DvppCropPara crop_para_;
DvppCropAndPastePara crop_and_paste_para_;
// only one of the resize or crop flag can be true
bool to_resize_flag_ = false;
bool to_crop_flag_ = false;
bool to_crop_and_paste_flag_ = false;
void *input_pic_dev_buffer_ = nullptr;
uint32_t input_pic_buffer_size_ = 0;
uint32_t decode_output_buffer_size_ = 0;
void *decode_output_buffer_dev_ = nullptr;
acldvppPicDesc *decode_output_desc_ = nullptr;
acldvppResizeConfig *resize_config_ = nullptr;
acldvppRoiConfig *crop_area_ = nullptr;
acldvppRoiConfig *paste_area_ = nullptr;
acldvppPicDesc *vpc_output_desc_ = nullptr;
void *vpc_output_buffer_dev_ = nullptr; // vpc_output_buffer_size_ length
uint32_t vpc_output_buffer_size_ = 0;
void *batch_vpc_output_buffer_dev_ = nullptr; // batch_size_ * vpc_output_buffer_size_ length
uint32_t batch_size_ = 0;
aclrtStream stream_ = nullptr;
acldvppChannelDesc *dvpp_channel_desc_ = nullptr;
uint32_t AlignmentHelper(uint32_t org_size, uint32_t alignment) const;
uint32_t GetImageBufferSize(uint32_t stride_width, uint32_t stride_height, acldvppPixelFormat pixel_format) const;
Status GetPicDescStride(uint32_t width, uint32_t height, uint32_t &stride_width, uint32_t &stride_height);
Status GetPicDescStrideDecode(uint32_t width, uint32_t height, uint32_t &stride_width, uint32_t &stride_height);
Status InputInputBuffer(const void *pic_buffer, size_t pic_buffer_size);
Status InitDecodeOutputDesc(uint32_t image_width,
uint32_t image_height); // decode_output_desc_, decode_output_buffer_dev_
Status CheckRoiAreaWidthHeight(uint32_t width, uint32_t height);
Status CheckAndAdjustRoiArea(DvppRoiArea &area);
Status UpdateCropArea(uint32_t image_width, uint32_t image_height);
Status CheckResizeImageInfo(uint32_t image_width, uint32_t image_height) const;
void DestroyDecodeDesc();
Status InitVpcOutputDesc(uint32_t output_width, uint32_t output_height,
acldvppPixelFormat pixel_format); // vpc_output_desc_, vpc_output_buffer_dev_batch_
Status InitRoiAreaConfig(acldvppRoiConfig *&roi_area, const DvppRoiArea &init_para);
Status InitCommonCropPara(DvppCropInfo &crop_info, uint32_t out_width, uint32_t out_height);
Status InitResizeOutputDesc(); // vpc_output_desc_, vpc_output_buffer_dev_, resize_config
Status InitCropOutputDesc(); // vpc_output_desc_, vpc_output_buffer_dev_, crop_area_
Status InitCropAndPasteOutputDesc(); // vpc_output_desc_, vpc_output_buffer_dev_, crop_area_, paste_area_
void DestroyVpcOutputDesc();
Status ProcessDecode();
Status ProcessResize();
Status ProcessCrop();
Status ProcessCropAndPaste();
void DestroyResource();
Status GetJpegWidthHeight(const void *pic_buffer, size_t pic_buffer_size, uint32_t &image_width,
uint32_t &image_height);
};
} // namespace mindspore::inference
#endif // INC_DVPP_PROCESS_ACL

@ -1,68 +0,0 @@
{
"preprocess": [
{
"input": {
"index": 0
},
"decode_para": {
"out_pixel_format": "YUV420SP"
},
"dvpp_process": {
"op_name": "resize",
"out_width": 224,
"out_height": 224
},
"sample of dvpp_process content": [
{
"op_name": "resize",
"out_width": 224,
"out_height": 224
},
{
"op_name": "crop",
"crop_type": "offset",
"crop_left": 10,
"crop_top": 10,
"crop_right": 100,
"crop_bottom": 200,
"out_width": 224,
"out_height": 224
},
{
"op_name": "crop",
"crop_type": "centre",
"crop_width": 100,
"crop_height": 100,
"out_width": 224,
"out_height": 224
},
{
"op_name": "crop_and_paste",
"crop_type": "offset",
"crop_left": 10,
"crop_top": 10,
"crop_right": 100,
"crop_bottom": 200,
"paste_left": 10,
"paste_top": 10,
"paste_right": 100,
"paste_bottom": 200,
"out_width": 224,
"out_height": 224
},
{
"op_name": "crop_and_paste",
"crop_type": "centre",
"crop_width": 100,
"crop_height": 100,
"paste_left": 10,
"paste_top": 10,
"paste_right": 100,
"paste_bottom": 200,
"out_width": 224,
"out_height": 224
}
]
}
]
}

File diff suppressed because it is too large Load Diff

@ -1,83 +0,0 @@
/**
* Copyright 2020 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef INC_MODEL_PROCESS_ACL
#define INC_MODEL_PROCESS_ACL
#include <vector>
#include <string>
#include "acl/acl.h"
#include "acl/acl_mdl.h"
#include "acl/acl_rt.h"
#include "include/inference.h"
namespace mindspore {
namespace inference {
struct AclTensorInfo {
void *device_data;
size_t buffer_size;
aclDataType data_type;
std::vector<int64_t> dims;
};
struct ImagesDvppOutput {
void *buffer_device = nullptr;
size_t buffer_size = 0;
size_t input_index = 0;
};
class ModelProcess {
public:
ModelProcess() {}
~ModelProcess() {}
Status LoadModelFromFile(const std::string &file_name, uint32_t &model_id);
void UnLoad();
// override this method to avoid request/reply data copy
Status Execute(const RequestBase &request, ReplyBase &reply);
Status Execute(const void *dvpp_outputs_buffer_dev, size_t dvpp_outputs_buffer_size, ReplyBase &reply);
void SetIsDevice(bool is_device) { is_run_on_device_ = is_device; }
size_t GetBatchSize() const;
private:
uint32_t model_id_ = 0xffffffff;
// if run one device(AICPU), there is no need to alloc device memory and copy inputs to(/outputs from) device
bool is_run_on_device_ = false;
aclmdlDesc *model_desc_ = nullptr;
aclmdlDataset *inputs_ = nullptr;
aclmdlDataset *outputs_ = nullptr;
std::vector<AclTensorInfo> input_infos_;
std::vector<AclTensorInfo> output_infos_;
Status PreInitModelResource();
Status CreateDataBuffer(void *&data_mem_buffer, size_t buffer_size, aclmdlDataset *dataset);
Status CheckAndInitInput(const RequestBase &request);
Status CheckAndInitDvppInput(const void *dvpp_outputs_buffer_dev, size_t dvpp_outputs_buffer_size,
size_t input_index);
Status BuildOutputs(ReplyBase &reply);
Status InitInputsBuffer();
Status InitOutputsBuffer();
void DestroyInputsDataset();
void DestroyInputsDataMem();
void DestroyInputsBuffer();
void DestroyOutputsBuffer();
};
} // namespace inference
} // namespace mindspore
#endif

File diff suppressed because it is too large Load Diff

@ -1,29 +0,0 @@
/**
* Copyright 2020 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef MINDSPORE_SERVING_HTTP_PROCESS_H
#define MINDSPORE_SERVING_HTTP_PROCESS_H
#include <evhttp.h>
#include <event.h>
#include <event2/http.h>
#include <event2/http_struct.h>
namespace mindspore {
namespace serving {
void http_handler_msg(struct evhttp_request *req, void *arg);
} // namespace serving
} // namespace mindspore
#endif // MINDSPORE_SERVER_H

File diff suppressed because it is too large Load Diff

@ -1,30 +0,0 @@
/**
* Copyright 2020 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef MINDSPORE_SERVER_H
#define MINDSPORE_SERVER_H
#include "util/status.h"
namespace mindspore {
namespace serving {
class Server {
public:
Server() = default;
~Server() = default;
Status BuildAndStart();
};
} // namespace serving
} // namespace mindspore
#endif // MINDSPORE_SERVER_H

@ -1,194 +0,0 @@
/**
* Copyright 2020 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "core/serving_tensor.h"
#include <vector>
#include <unordered_map>
#include <string>
#include <algorithm>
#include "include/infer_log.h"
using std::string;
using std::unordered_map;
using std::vector;
namespace mindspore {
namespace serving {
using inference::DataType;
using inference::InferTensorBase;
const size_t kMaxShapeElementCount = INT32_MAX;
const size_t kMaxDataBufferSize = UINT32_MAX;
ServingTensor::ServingTensor(ms_serving::Tensor &other) : tensor_(other) {}
ServingTensor::~ServingTensor() {}
DataType ServingTensor::data_type() const {
const std::unordered_map<ms_serving::DataType, inference::DataType> type2id_map{
{ms_serving::MS_UNKNOWN, inference::kMSI_Unknown}, {ms_serving::MS_BOOL, inference::kMSI_Bool},
{ms_serving::MS_INT8, inference::kMSI_Int8}, {ms_serving::MS_UINT8, inference::kMSI_Uint8},
{ms_serving::MS_INT16, inference::kMSI_Int16}, {ms_serving::MS_UINT16, inference::kMSI_Uint16},
{ms_serving::MS_INT32, inference::kMSI_Int32}, {ms_serving::MS_UINT32, inference::kMSI_Uint32},
{ms_serving::MS_INT64, inference::kMSI_Int64}, {ms_serving::MS_UINT64, inference::kMSI_Uint64},
{ms_serving::MS_FLOAT16, inference::kMSI_Float16}, {ms_serving::MS_FLOAT32, inference::kMSI_Float32},
{ms_serving::MS_FLOAT64, inference::kMSI_Float64},
};
auto it = type2id_map.find(tensor_.tensor_type());
if (it == type2id_map.end()) {
MSI_LOG_WARNING << "failed to get data type, undefined data type " << tensor_.tensor_type();
return inference::kMSI_Unknown;
} else {
return it->second;
}
}
void ServingTensor::set_data_type(DataType data_type) {
const std::unordered_map<inference::DataType, ms_serving::DataType> id2type_map{
{inference::kMSI_Unknown, ms_serving::MS_UNKNOWN}, {inference::kMSI_Bool, ms_serving::MS_BOOL},
{inference::kMSI_Float64, ms_serving::MS_FLOAT64}, {inference::kMSI_Int8, ms_serving::MS_INT8},
{inference::kMSI_Uint8, ms_serving::MS_UINT8}, {inference::kMSI_Int16, ms_serving::MS_INT16},
{inference::kMSI_Uint16, ms_serving::MS_UINT16}, {inference::kMSI_Int32, ms_serving::MS_INT32},
{inference::kMSI_Uint32, ms_serving::MS_UINT32}, {inference::kMSI_Int64, ms_serving::MS_INT64},
{inference::kMSI_Uint64, ms_serving::MS_UINT64}, {inference::kMSI_Float16, ms_serving::MS_FLOAT16},
{inference::kMSI_Float32, ms_serving::MS_FLOAT32},
};
auto it = id2type_map.find(data_type);
if (it == id2type_map.end()) {
MSI_LOG_WARNING << "failed to set data type, undefined data type " << data_type;
tensor_.set_tensor_type(ms_serving::MS_UNKNOWN);
} else {
tensor_.set_tensor_type(it->second);
}
}
std::vector<int64_t> ServingTensor::shape() const {
std::vector<int64_t> result;
auto dims = tensor_.tensor_shape().dims();
std::transform(dims.begin(), dims.end(), std::back_inserter(result), [](const int64_t dim) { return dim; });
return result;
}
void ServingTensor::set_shape(const std::vector<int64_t> &shape) {
auto tensor_shape = tensor_.mutable_tensor_shape();
tensor_shape->Clear();
size_t element_count = 1;
for (auto dim : shape) {
if (dim <= 0 || element_count > kMaxShapeElementCount / dim) {
MSI_LOG_ERROR << "failed to set shape, invalid dim num " << dim;
tensor_shape->Clear();
return;
}
element_count *= dim;
tensor_shape->add_dims(dim);
}
}
bool ServingTensor::resize_data(size_t data_len) {
string *buffer = tensor_.mutable_data();
if (buffer == nullptr) {
MSI_LOG_ERROR << "invalid buffer data";
return false;
}
buffer->resize(data_len);
return true;
}
size_t ServingTensor::data_size() const { return tensor_.data().size(); }
void *ServingTensor::mutable_data() { return const_cast<char *>(tensor_.mutable_data()->data()); }
const void *ServingTensor::data() const { return tensor_.data().data(); }
ServingRequest::ServingRequest(const ms_serving::PredictRequest &request) : request_(request) {
auto &data = request_.data();
std::transform(data.begin(), data.end(), std::back_inserter(cache_),
[](const ms_serving::Tensor &item) { return ServingTensor(const_cast<ms_serving::Tensor &>(item)); });
}
size_t ServingRequest::size() const { return cache_.size(); }
const InferTensorBase *ServingRequest::operator[](size_t index) const {
if (index >= cache_.size()) {
MSI_LOG_ERROR << "visit invalid index " << index << " total size " << cache_.size();
return nullptr;
}
return &(cache_[index]);
}
ServingImages::ServingImages(const ms_serving::Images &images) : images_(images) {}
size_t ServingImages::batch_size() const { return images_.images_size(); }
bool ServingImages::get(size_t index, const void *&pic_buffer, uint32_t &pic_size) const {
if (index >= static_cast<size_t>(images_.images_size())) {
MSI_LOG_ERROR << "visit invalid index " << index << " total size " << images_.images_size();
return false;
}
pic_buffer = images_.images(index).data();
pic_size = images_.images(index).size();
return true;
}
size_t ServingImages::input_index() const { return static_cast<size_t>(images_.input_index()); }
size_t ServingReply::size() const { return cache_.size(); }
InferTensorBase *ServingReply::operator[](size_t index) {
if (index >= cache_.size()) {
MSI_LOG_ERROR << "visit invalid index " << index << " total size " << cache_.size();
return nullptr;
}
return &(cache_[index]);
}
const InferTensorBase *ServingReply::operator[](size_t index) const {
if (index >= cache_.size()) {
MSI_LOG_ERROR << "visit invalid index " << index << " total size " << cache_.size();
return nullptr;
}
return &(cache_[index]);
}
InferTensorBase *ServingReply::add() {
auto new_item = reply_.add_result();
if (new_item == nullptr) {
MSI_LOG_ERROR << "add new item failed, current total size " << cache_.size();
return nullptr;
}
cache_.push_back(ServingTensor(*new_item));
return &(cache_.back());
}
void ServingReply::clear() { reply_.mutable_result()->Clear(); }
ServingImagesRequest::ServingImagesRequest(const ms_serving::PredictRequest &request) : request_(request) {
auto &images_inputs = request_.images();
std::transform(images_inputs.begin(), images_inputs.end(), std::back_inserter(cache_),
[](const ms_serving::Images &item) { return ServingImages(const_cast<ms_serving::Images &>(item)); });
}
size_t ServingImagesRequest::size() const { return cache_.size(); }
const inference::InferImagesBase *ServingImagesRequest::operator[](size_t index) const {
if (index >= cache_.size()) {
MSI_LOG_ERROR << "visit invalid index " << index << " total size " << cache_.size();
return nullptr;
}
return &(cache_[index]);
}
} // namespace serving
} // namespace mindspore

@ -1,105 +0,0 @@
/**
* Copyright 2020 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef MINDSPORE_SERVING_TENSOR_H_
#define MINDSPORE_SERVING_TENSOR_H_
#include <utility>
#include <vector>
#include <memory>
#include "include/infer_tensor.h"
#include "serving/ms_service.pb.h"
namespace mindspore {
namespace serving {
class MS_API ServingTensor : public inference::InferTensorBase {
public:
// the other's lifetime must longer than this object
explicit ServingTensor(ms_serving::Tensor &other);
~ServingTensor();
inference::DataType data_type() const override;
void set_data_type(inference::DataType type) override;
std::vector<int64_t> shape() const override;
void set_shape(const std::vector<int64_t> &shape) override;
const void *data() const override;
size_t data_size() const override;
bool resize_data(size_t data_len) override;
void *mutable_data() override;
private:
// if tensor_ is reference from other ms_serving::Tensor, the other's lifetime must
// longer than this object
ms_serving::Tensor &tensor_;
};
class ServingImages : public inference::InferImagesBase {
public:
explicit ServingImages(const ms_serving::Images &images);
~ServingImages() = default;
size_t batch_size() const override;
bool get(size_t index, const void *&pic_buffer, uint32_t &pic_size) const override;
size_t input_index() const override;
private:
const ms_serving::Images &images_;
};
class ServingRequest : public inference::RequestBase {
public:
explicit ServingRequest(const ms_serving::PredictRequest &request);
~ServingRequest() = default;
size_t size() const override;
const inference::InferTensorBase *operator[](size_t index) const override;
private:
const ms_serving::PredictRequest &request_;
std::vector<ServingTensor> cache_;
};
class ServingReply : public inference::ReplyBase {
public:
explicit ServingReply(ms_serving::PredictReply &reply) : reply_(reply) {}
~ServingReply() = default;
size_t size() const override;
inference::InferTensorBase *operator[](size_t index) override;
const inference::InferTensorBase *operator[](size_t index) const override;
inference::InferTensorBase *add() override;
void clear() override;
private:
ms_serving::PredictReply &reply_;
std::vector<ServingTensor> cache_;
};
class ServingImagesRequest : public inference::ImagesRequestBase {
public:
explicit ServingImagesRequest(const ms_serving::PredictRequest &request);
~ServingImagesRequest() = default;
size_t size() const override;
const inference::InferImagesBase *operator[](size_t index) const override;
private:
const ms_serving::PredictRequest &request_;
std::vector<ServingImages> cache_;
};
} // namespace serving
} // namespace mindspore
#endif // MINDSPORE_SERVING_TENSOR_H_

@ -1,154 +0,0 @@
/**
* Copyright 2020 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "core/session.h"
#include <grpcpp/grpcpp.h>
#include <string>
#include <map>
#include <vector>
#include <utility>
#include <memory>
#include <chrono>
#include "include/infer_log.h"
#include "serving/ms_service.grpc.pb.h"
#include "core/util/option_parser.h"
#include "core/version_control/version_controller.h"
#include "core/util/file_system_operation.h"
#include "core/serving_tensor.h"
using ms_serving::MSService;
using ms_serving::PredictReply;
using ms_serving::PredictRequest;
namespace mindspore {
namespace serving {
Status Session::CreatDeviceSession(const std::string &device, uint32_t device_id) {
session_ = inference::InferSession::CreateSession(device, device_id);
if (session_ == nullptr) {
MSI_LOG(ERROR) << "Creat Session Failed";
return FAILED;
}
device_type_ = device;
return SUCCESS;
}
Session &Session::Instance() {
static Session instance;
return instance;
}
Status Session::Predict(const PredictRequest &request, PredictReply &reply) {
try {
auto status = PredictInner(request, reply);
return status;
} catch (const std::bad_alloc &ex) {
MSI_LOG(ERROR) << "Serving Error: malloc memory failed";
std::cout << "Serving Error: malloc memory failed" << std::endl;
} catch (const std::runtime_error &ex) {
MSI_LOG(ERROR) << "Serving Error: runtime error occurred: " << ex.what();
std::cout << "Serving Error: runtime error occurred: " << ex.what() << std::endl;
} catch (const std::exception &ex) {
MSI_LOG(ERROR) << "Serving Error: exception occurred: " << ex.what();
std::cout << "Serving Error: exception occurred: " << ex.what() << std::endl;
} catch (...) {
MSI_LOG(ERROR) << "Serving Error: exception occurred";
std::cout << "Serving Error: exception occurred";
}
return FAILED;
}
Status Session::PredictInner(const PredictRequest &request, PredictReply &reply) {
if (!model_loaded_) {
MSI_LOG(ERROR) << "the model has not loaded";
return FAILED;
}
if (session_ == nullptr) {
MSI_LOG(ERROR) << "the inference session has not be initialized";
return FAILED;
}
std::lock_guard<std::mutex> lock(mutex_);
MSI_LOG(INFO) << "run Predict";
if (request.images_size() > 0) {
ServingImagesRequest serving_images(request);
ServingRequest serving_request(request);
ServingReply serving_reply(reply);
Status ret = session_->ExecuteModel(graph_id_, serving_images, serving_request, serving_reply);
if (ret != SUCCESS) {
MSI_LOG(ERROR) << "execute model with images return failed";
return ret;
}
} else if (request.data_size() > 0) {
ServingRequest serving_request(request);
ServingReply serving_reply(reply);
Status ret = session_->ExecuteModel(graph_id_, serving_request, serving_reply);
if (ret != SUCCESS) {
MSI_LOG(ERROR) << "execute model with datas return failed";
return ret;
}
}
MSI_LOG(INFO) << "run Predict finished";
return SUCCESS;
}
Status Session::Warmup(const MindSporeModelPtr model) {
if (session_ == nullptr) {
MSI_LOG(ERROR) << "The CreatDeviceSession should be called, before warmup";
return FAILED;
}
std::lock_guard<std::mutex> lock(mutex_);
std::string file_name = model->GetModelPath() + '/' + model->GetModelName();
model_loaded_ = false;
MSI_TIME_STAMP_START(LoadModelFromFile)
auto ret = session_->LoadModelFromFile(file_name, graph_id_);
MSI_TIME_STAMP_END(LoadModelFromFile)
if (ret != SUCCESS) {
MSI_LOG(ERROR) << "Load graph model failed, file name is " << file_name.c_str();
return ret;
}
model_loaded_ = true;
MSI_LOG(INFO) << "Session Warmup finished";
return SUCCESS;
}
Status Session::Clear() {
if (session_ != nullptr) {
session_->UnloadModel(graph_id_);
session_->FinalizeEnv();
session_ = nullptr;
}
return SUCCESS;
}
Status Session::GetModelInputsInfo(std::vector<inference::InferTensor> &tensor_list) {
if (!model_loaded_) {
MSI_LOG(ERROR) << "the model has not loaded";
return FAILED;
}
if (session_ == nullptr) {
MSI_LOG(ERROR) << "the inference session has not be initialized";
return FAILED;
}
std::lock_guard<std::mutex> lock(mutex_);
Status ret = session_->GetModelInputsInfo(graph_id_, &tensor_list);
if (ret != SUCCESS) {
MSI_LOG(ERROR) << "get model inputs info failed";
}
return ret;
}
} // namespace serving
} // namespace mindspore

Some files were not shown because too many files have changed in this diff Show More

Loading…
Cancel
Save