serving add acl support, extract common inference interface

5 years ago · 314208633b
parent bfc18704d5
commit 314208633b
35 changed files with 1831 additions and 767 deletions
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@ -51,6 +51,8 @@ include_directories(${CMAKE_CURRENT_SOURCE_DIR})
 include_directories(${CMAKE_CURRENT_SOURCE_DIR}/third_party/flatbuffers/include)
 include_directories(${CMAKE_CURRENT_SOURCE_DIR}/third_party/flatbuffers/include/flatbuffers)

+if (NOT ENABLE_ACL)
+
 include(${CMAKE_SOURCE_DIR}/cmake/dependency_utils.cmake)
 find_package(Python3 3.7 COMPONENTS Interpreter Development)
 if(Python3_FOUND)
@ -100,8 +102,12 @@ if (ENABLE_TESTCASES)
    add_subdirectory(tests)
 endif()

+endif() # NOT ENABLE_ACL
+
 if (ENABLE_SERVING)
    add_subdirectory(serving)
 endif()

+if (NOT ENABLE_ACL)
 include(cmake/package.cmake)
+endif() # NOT ENABLE_ACL
--- a/build.sh
+++ b/build.sh
@ -25,7 +25,7 @@ usage()
  echo "Usage:"
  echo "bash build.sh [-d] [-r] [-v] [-c on|off] [-t on|off] [-g on|off] [-h] [-b ge] [-m infer|train] \\"
  echo "              [-a on|off] [-Q on|off] [-p on|off] [-i] [-L] [-R] [-D on|off] [-j[n]] [-e gpu|d|cpu] \\"
-  echo "              [-P on|off] [-z [on|off]] [-M on|off] [-V 9.2|10.1] [-I] [-K] [-B on|off] [-E] [-l on|off]"
+  echo "              [-P on|off] [-z [on|off]] [-M on|off] [-V 9.2|10.1] [-I] [-K] [-B on|off] [-w on|off] [-E] [-l on|off]"
  echo ""
  echo "Options:"
  echo "    -d Debug mode"
@ -54,6 +54,7 @@ usage()
  echo "    -I Compile predict, default off"
  echo "    -K Compile with AKG, default on"
  echo "    -s Enable serving module, default off"
+  echo "    -w Enable acl module, default off"
  echo "    -B Enable debugger, default off"
  echo "    -E Enable IBVERBS for parameter server, default off"
  echo "    -l Compile with python dependency, default on"
@ -97,12 +98,13 @@ checkopts()
  PREDICT_PLATFORM=""
  ENABLE_AKG="on"
  ENABLE_SERVING="off"
+  ENABLE_ACL="off"
  ENABLE_DEBUGGER="off"
  ENABLE_IBVERBS="off"
  ENABLE_PYTHON="on"

  # Process the options
-  while getopts 'drvj:c:t:hsb:a:g:p:ie:m:l:I:LRP:Q:D:zM:V:K:sB:E' opt
+  while getopts 'drvj:c:t:hsb:a:g:p:ie:m:l:I:LRP:Q:D:zM:V:K:swB:E' opt
  do
    OPTARG=$(echo ${OPTARG} | tr '[A-Z]' '[a-z]')
    case "${opt}" in
@ -256,6 +258,10 @@ checkopts()
        ENABLE_SERVING="on"
        echo "enable serving"
        ;;
+      w)
+        ENABLE_ACL="on"
+        echo "enable acl"
+        ;;
      B)
        check_on_off $OPTARG B
        ENABLE_DEBUGGER="on"
@ -348,6 +354,9 @@ build_mindspore()
    if [[ "X$ENABLE_SERVING" = "Xon" ]]; then
        CMAKE_ARGS="${CMAKE_ARGS} -DENABLE_SERVING=ON"
    fi
+    if [[ "X$ENABLE_ACL" = "Xon" ]]; then
+        CMAKE_ARGS="${CMAKE_ARGS} -DENABLE_ACL=ON"
+    fi
    if [[ "X$ENABLE_DEBUGGER" = "Xon" ]]; then
        CMAKE_ARGS="${CMAKE_ARGS} -DENABLE_DEBUGGER=ON"
    fi
@ -362,7 +371,11 @@ build_mindspore()
    if [[ -n "$VERBOSE" ]]; then
      CMAKE_VERBOSE="--verbose"
    fi
+    if [[ "X$ENABLE_ACL" = "Xon" ]]; then
+    cmake --build . ${CMAKE_VERBOSE} -j$THREAD_NUM
+    else
    cmake --build . --target package ${CMAKE_VERBOSE} -j$THREAD_NUM
+    fi
    echo "success to build mindspore project!"
 }

--- a/include/infer_log.h
+++ b/include/infer_log.h
@ -0,0 +1,107 @@
+/**
+ * Copyright 2019 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef MINDSPORE_INFERENCE_LOG_H_
+#define MINDSPORE_INFERENCE_LOG_H_
+
+#include <stdarg.h>
+#include <stdint.h>
+#include <string>
+#include <sstream>
+#include <memory>
+#include <iostream>
+
+#ifndef ENABLE_ACL
+#include "mindspore/ccsrc/utils/log_adapter.h"
+namespace mindspore::inference {
+#define MSI_LOG(level) MS_LOG(level)
+
+#define MSI_LOG_DEBUG MSI_LOG(DEBUG)
+#define MSI_LOG_INFO MSI_LOG(INFO)
+#define MSI_LOG_WARNING MSI_LOG(WARNING)
+#define MSI_LOG_ERROR MSI_LOG(ERROR)
+
+#define MSI_ASSERT(item) MS_ASSERT(item)
+}  // namespace mindspore::inference
+
+#else  // ENABLE_ACL
+#include "acl/acl.h"
+namespace mindspore::inference {
+
+class LogStream {
+ public:
+  LogStream() { sstream_ = std::make_shared<std::stringstream>(); }
+  ~LogStream() = default;
+
+  template <typename T>
+  LogStream &operator<<(const T &val) noexcept {
+    (*sstream_) << val;
+    return *this;
+  }
+
+  LogStream &operator<<(std::ostream &func(std::ostream &os)) noexcept {
+    (*sstream_) << func;
+    return *this;
+  }
+
+  friend class LogWriter;
+
+ private:
+  std::shared_ptr<std::stringstream> sstream_;
+};
+
+template <class T, typename std::enable_if<std::is_enum<T>::value, int>::type = 0>
+constexpr std::ostream &operator<<(std::ostream &stream, const T &value) {
+  return stream << static_cast<typename std::underlying_type<T>::type>(value);
+}
+
+class LogWriter {
+ public:
+  LogWriter(const char *file, int line, const char *func, aclLogLevel log_level)
+      : file_(file), line_(line), func_(func), log_level_(log_level) {}
+  ~LogWriter() = default;
+
+  void operator<(const LogStream &stream) const noexcept __attribute__((visibility("default"))) {
+    std::ostringstream msg;
+    msg << stream.sstream_->rdbuf();
+    OutputLog(msg);
+  }
+
+ private:
+  void OutputLog(const std::ostringstream &msg) const { aclAppLog(log_level_, func_, file_, line_, msg.str().c_str()); }
+
+  const char *file_;
+  int line_;
+  const char *func_;
+  aclLogLevel log_level_;
+};
+
+#define MSILOG_IF(level) inference::LogWriter(__FILE__, __LINE__, __FUNCTION__, ACL_##level) < inference::LogStream()
+
+#define MSI_LOG(level) MSI_LOG_##level
+
+#define MSI_LOG_DEBUG MSILOG_IF(DEBUG)
+#define MSI_LOG_INFO MSILOG_IF(INFO)
+#define MSI_LOG_WARNING MSILOG_IF(WARNING)
+#define MSI_LOG_ERROR MSILOG_IF(ERROR)
+
+#define MSI_ASSERT(item)
+
+}  // namespace mindspore::inference
+
+#endif  // ENABLE_ACL
+
+#endif  // MINDSPORE_INFERENCE_LOG_H_
--- a/include/infer_tensor.h
+++ b/include/infer_tensor.h
@ -0,0 +1,191 @@
+/**
+ * Copyright 2020 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef MINDSPORE_INCLUDE_INFER_TENSOR_H_
+#define MINDSPORE_INCLUDE_INFER_TENSOR_H_
+
+#include <utility>
+#include <vector>
+#include <memory>
+#include <numeric>
+#include <map>
+#include <functional>
+
+#include "securec/include/securec.h"
+#include "include/infer_log.h"
+
+namespace mindspore {
+#define MS_API __attribute__((visibility("default")))
+namespace inference {
+
+enum DataType {
+  kMSI_Unknown = 0,
+  kMSI_Bool = 1,
+  kMSI_Int8 = 2,
+  kMSI_Int16 = 3,
+  kMSI_Int32 = 4,
+  kMSI_Int64 = 5,
+  kMSI_Uint8 = 6,
+  kMSI_Uint16 = 7,
+  kMSI_Uint32 = 8,
+  kMSI_Uint64 = 9,
+  kMSI_Float16 = 10,
+  kMSI_Float32 = 11,
+  kMSI_Float64 = 12,
+};
+
+class InferTensorBase {
+ public:
+  InferTensorBase() = default;
+  virtual ~InferTensorBase() = default;
+
+  virtual DataType data_type() const = 0;
+  virtual void set_data_type(DataType type) = 0;
+  virtual std::vector<int64_t> shape() const = 0;
+  virtual void set_shape(const std::vector<int64_t> &shape) = 0;
+  virtual const void *data() const = 0;
+  virtual size_t data_size() const = 0;
+  virtual bool resize_data(size_t data_len) = 0;
+  virtual void *mutable_data() = 0;
+
+  bool set_data(const void *data, size_t data_len) {
+    resize_data(data_len);
+    if (mutable_data() == nullptr) {
+      MSI_LOG_ERROR << "set data failed, data len " << data_len;
+      return false;
+    }
+    if (data_size() != data_len) {
+      MSI_LOG_ERROR << "set data failed, tensor current data size " << data_size() << " not match data len "
+                    << data_len;
+      return false;
+    }
+    if (data_len == 0) {
+      return true;
+    }
+    memcpy_s(mutable_data(), data_size(), data, data_len);
+    return true;
+  }
+
+  int64_t ElementNum() const {
+    std::vector<int64_t> shapex = shape();
+    return std::accumulate(shapex.begin(), shapex.end(), 1LL, std::multiplies<int64_t>());
+  }
+
+  int GetTypeSize(DataType type) const {
+    const std::map<DataType, size_t> type_size_map{
+      {kMSI_Bool, sizeof(bool)},       {kMSI_Float64, sizeof(double)},   {kMSI_Int8, sizeof(int8_t)},
+      {kMSI_Uint8, sizeof(uint8_t)},   {kMSI_Int16, sizeof(int16_t)},    {kMSI_Uint16, sizeof(uint16_t)},
+      {kMSI_Int32, sizeof(int32_t)},   {kMSI_Uint32, sizeof(uint32_t)},  {kMSI_Int64, sizeof(int64_t)},
+      {kMSI_Uint64, sizeof(uint64_t)}, {kMSI_Float16, sizeof(uint16_t)}, {kMSI_Float32, sizeof(float)},
+    };
+    auto it = type_size_map.find(type);
+    if (it != type_size_map.end()) {
+      return it->second;
+    }
+    return 0;
+  }
+};
+
+class InferTensor : public InferTensorBase {
+ public:
+  DataType type_;
+  std::vector<int64_t> shape_;
+  std::vector<uint8_t> data_;
+
+ public:
+  InferTensor() = default;
+  InferTensor(DataType type, std::vector<int64_t> shape, const void *data, size_t data_len) {
+    set_data_type(type);
+    set_shape(shape);
+    set_data(data, data_len);
+  }
+
+  void set_data_type(DataType type) override { type_ = type; }
+  DataType data_type() const override { return type_; }
+
+  void set_shape(const std::vector<int64_t> &shape) override { shape_ = shape; }
+  std::vector<int64_t> shape() const override { return shape_; }
+
+  const void *data() const override { return data_.data(); }
+  size_t data_size() const override { return data_.size(); }
+
+  bool resize_data(size_t data_len) override {
+    data_.resize(data_len);
+    return true;
+  }
+  void *mutable_data() override { return data_.data(); }
+};
+
+class RequestBase {
+ public:
+  virtual size_t size() const = 0;
+  virtual const InferTensorBase *operator[](size_t index) const = 0;
+};
+
+class ReplyBase {
+ public:
+  virtual size_t size() const = 0;
+  virtual InferTensorBase *operator[](size_t index) = 0;
+  virtual const InferTensorBase *operator[](size_t index) const = 0;
+  virtual InferTensorBase *add() = 0;
+  virtual void clear() = 0;
+};
+
+class VectorInferTensorWrapReply : public ReplyBase {
+ public:
+  explicit VectorInferTensorWrapReply(std::vector<InferTensor> &tensor_list) : tensor_list_(tensor_list) {}
+
+  size_t size() const { return tensor_list_.size(); }
+  InferTensorBase *operator[](size_t index) {
+    if (index >= tensor_list_.size()) {
+      MSI_LOG_ERROR << "visit invalid index " << index << " total size " << tensor_list_.size();
+      return nullptr;
+    }
+    return &(tensor_list_[index]);
+  }
+  const InferTensorBase *operator[](size_t index) const {
+    if (index >= tensor_list_.size()) {
+      MSI_LOG_ERROR << "visit invalid index " << index << " total size " << tensor_list_.size();
+      return nullptr;
+    }
+    return &(tensor_list_[index]);
+  }
+  InferTensorBase *add() {
+    tensor_list_.push_back(InferTensor());
+    return &(tensor_list_.back());
+  }
+  void clear() { tensor_list_.clear(); }
+  std::vector<InferTensor> &tensor_list_;
+};
+
+class VectorInferTensorWrapRequest : public RequestBase {
+ public:
+  explicit VectorInferTensorWrapRequest(const std::vector<InferTensor> &tensor_list) : tensor_list_(tensor_list) {}
+
+  size_t size() const { return tensor_list_.size(); }
+  const InferTensorBase *operator[](size_t index) const {
+    if (index >= tensor_list_.size()) {
+      MSI_LOG_ERROR << "visit invalid index " << index << " total size " << tensor_list_.size();
+      return nullptr;
+    }
+    return &(tensor_list_[index]);
+  }
+  const std::vector<InferTensor> &tensor_list_;
+};
+
+}  // namespace inference
+}  // namespace mindspore
+#endif  // MINDSPORE_INCLUDE_INFER_TENSOR_H_
--- a/include/inference.h
+++ b/include/inference.h
@ -20,28 +20,32 @@
 #include <memory>
 #include <vector>
 #include <string>
-#include "include/ms_tensor.h"
+#include "include/infer_tensor.h"

 namespace mindspore {
-class FuncGraph;
 namespace inference {
-using VectorForMSTensorPtr = std::vector<std::shared_ptr<inference::MSTensor>>;
-class MS_API MSSession {
- public:
-  MSSession() = default;
-
-  static std::shared_ptr<MSSession> CreateSession(const std::string &device, uint32_t device_id);
-
-  virtual uint32_t CompileGraph(std::shared_ptr<FuncGraph> funcGraphPtr) = 0;

-  virtual MultiTensor RunGraph(uint32_t graph_id, const VectorForMSTensorPtr &inputs) = 0;
-
-  virtual bool CheckModelInputs(uint32_t graph_id, const VectorForMSTensorPtr &inputs) const = 0;
+class MS_API InferSession {
+ public:
+  InferSession() = default;
+  virtual ~InferSession() = default;
+  virtual bool InitEnv(const std::string &device_type, uint32_t device_id) = 0;
+  virtual bool FinalizeEnv() = 0;
+  virtual bool LoadModelFromFile(const std::string &file_name, uint32_t &model_id) = 0;
+  virtual bool UnloadModel(uint32_t model_id) = 0;
+  // override this method to avoid request/reply data copy
+  virtual bool ExecuteModel(uint32_t model_id, const RequestBase &request, ReplyBase &reply) = 0;
+
+  virtual bool ExecuteModel(uint32_t model_id, const std::vector<InferTensor> &inputs,
+                            std::vector<InferTensor> &outputs) {
+    VectorInferTensorWrapRequest request(inputs);
+    VectorInferTensorWrapReply reply(outputs);
+    return ExecuteModel(model_id, request, reply);
+  }
+
+  static std::shared_ptr<InferSession> CreateSession(const std::string &device, uint32_t device_id);
 };

-std::shared_ptr<FuncGraph> MS_API LoadModel(const char *model_buf, size_t size, const std::string &device);
-
-void MS_API ExitInference();
 }  // namespace inference
 }  // namespace mindspore
 #endif  // MINDSPORE_INCLUDE_MS_SESSION_H
--- a/include/ms_tensor.h
+++ b/include/ms_tensor.h
@ -1,69 +0,0 @@
-/**
- * Copyright 2020 Huawei Technologies Co., Ltd
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef MINDSPORE_INCLUDE_MS_TENSOR_H_
-#define MINDSPORE_INCLUDE_MS_TENSOR_H_
-
-#include <utility>
-#include <vector>
-#include <memory>
-#include "mindspore/core/ir/dtype/type_id.h"
-
-namespace mindspore {
-#define MS_API __attribute__((visibility("default")))
-namespace inference {
-class MS_API MSTensor {
- public:
-  MSTensor() = default;
-  // brief Create a MSTensor pointer.
-  //
-  // param data_type DataTypeId of tensor to be created.
-  // param shape Shape of tensor to be created.
-  // return MSTensor pointer.
-  static MSTensor *CreateTensor(TypeId data_type, const std::vector<int> &shape);
-
-  ~MSTensor() = default;
-
-  virtual TypeId data_type() const = 0;
-
-  virtual TypeId set_data_type(const TypeId data_type) = 0;
-
-  virtual std::vector<int> shape() const = 0;
-
-  virtual size_t set_shape(const std::vector<int> &shape) = 0;
-
-  virtual int DimensionSize(size_t index) const = 0;
-  // brief Get number of element in MSTensor.
-  //
-  // return Number of element in MSTensor.
-  virtual int ElementsNum() const = 0;
-
-  virtual std::size_t hash() const = 0;
-  // brief Get byte size of data in MSTensor.
-  //
-  // return Byte size of data in MSTensor.
-  virtual size_t Size() const = 0;
-  // brief Get pointer of data in MSTensor.
-  //
-  // The data pointer can be used to both write or read data in MSTensor.
-  //
-  // return A pointer points to data in MSTensor.
-  virtual void *MutableData() const = 0;
-};
-using MultiTensor = std::vector<std::shared_ptr<inference::MSTensor>>;
-}  // namespace inference
-}  // namespace mindspore
-#endif  // MINDSPORE_INCLUDE_MS_TENSOR_H_
--- a/mindspore/ccsrc/CMakeLists.txt
+++ b/mindspore/ccsrc/CMakeLists.txt
@ -297,7 +297,7 @@ set(LOAD_ONNX_SRC
        ${CMAKE_CURRENT_SOURCE_DIR}/utils/load_onnx/anf_model_parser.cc
        )
 add_library(inference SHARED
-        ${CMAKE_CURRENT_SOURCE_DIR}/backend/session/session.cc
+        ${CMAKE_CURRENT_SOURCE_DIR}/backend/session/infer_session.cc
        ${LOAD_ONNX_SRC}
        )
 target_link_libraries(inference PRIVATE ${PYTHON_LIBRARIES} ${SECUREC_LIBRARY}
--- a/mindspore/ccsrc/backend/session/ascend_inference_session.cc
+++ b/mindspore/ccsrc/backend/session/ascend_inference_session.cc
@ -88,8 +88,7 @@ GraphId AscendInferenceSession::CompileGraph(NotNull<FuncGraphPtr> func_graph) {
  return graph_id;
 }

-bool AscendInferenceSession::CheckModelInputs(uint32_t graph_id,
-                                              const std::vector<std::shared_ptr<inference::MSTensor> > &inputs) {
+bool AscendInferenceSession::CheckModelInputs(uint32_t graph_id, const std::vector<tensor::TensorPtr> &inputs) const {
  MS_LOG(INFO) << "Start check client inputs, graph id : " << graph_id;
  auto kernel_graph = GetGraph(graph_id);
  MS_EXCEPTION_IF_NULL(kernel_graph);
@ -119,8 +118,7 @@ bool AscendInferenceSession::CheckModelInputs(uint32_t graph_id,
  return true;
 }

-bool AscendInferenceSession::CompareInput(const std::shared_ptr<inference::MSTensor> &input,
-                                          const ParameterPtr &parameter) {
+bool AscendInferenceSession::CompareInput(const tensor::TensorPtr &input, const ParameterPtr &parameter) const {
  MS_EXCEPTION_IF_NULL(input);
  MS_EXCEPTION_IF_NULL(parameter);
  // compare dims
@ -155,7 +153,7 @@ bool AscendInferenceSession::CompareInput(const std::shared_ptr<inference::MSTen
  return true;
 }

-std::string AscendInferenceSession::PrintInputShape(std::vector<size_t> shape) {
+std::string AscendInferenceSession::PrintInputShape(std::vector<size_t> shape) const {
  string res = "[";
  for (auto dim : shape) {
    res += " " + std::to_string(dim);
--- a/mindspore/ccsrc/backend/session/ascend_inference_session.h
+++ b/mindspore/ccsrc/backend/session/ascend_inference_session.h
@ -39,9 +39,9 @@ class AscendInferenceSession : public AscendSession {
  void LoadInputData(const std::shared_ptr<KernelGraph> &kernel_graph,
                     const std::vector<tensor::TensorPtr> &inputs_const) const;
  GraphId CompileGraph(NotNull<FuncGraphPtr> func_graph) override;
-  bool CheckModelInputs(uint32_t graph_id, const std::vector<std::shared_ptr<inference::MSTensor>> &inputs) override;
-  bool CompareInput(const std::shared_ptr<inference::MSTensor> &input, const ParameterPtr &parameter);
-  std::string PrintInputShape(std::vector<size_t> shape);
+  bool CheckModelInputs(uint32_t graph_id, const std::vector<tensor::TensorPtr> &inputs) const override;
+  bool CompareInput(const tensor::TensorPtr &input, const ParameterPtr &parameter) const;
+  std::string PrintInputShape(std::vector<size_t> shape) const;
 };
 MS_REG_SESSION(kDavinciInferenceDevice, AscendInferenceSession);
 }  // namespace session
--- a/mindspore/ccsrc/backend/session/infer_session.cc
+++ b/mindspore/ccsrc/backend/session/infer_session.cc
--- a/mindspore/ccsrc/backend/session/infer_session.h
+++ b/mindspore/ccsrc/backend/session/infer_session.h
@ -0,0 +1,66 @@
+/**
+ * Copyright 2020 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#ifndef MINDSPORE_CCSRC_SESSION_SESSION_H
+#define MINDSPORE_CCSRC_SESSION_SESSION_H
+
+#include <vector>
+#include <string>
+#include <unordered_map>
+#include <utility>
+#include <memory>
+#include <map>
+
+#include "backend/session/session_basic.h"
+#include "ir/anf.h"
+#include "include/inference.h"
+
+#ifdef ENABLE_D
+#include "runtime/context.h"
+#endif
+
+namespace mindspore {
+namespace inference {
+class MSInferSession : public InferSession {
+ public:
+  MSInferSession();
+  ~MSInferSession();
+
+  bool InitEnv(const std::string &device_type, uint32_t device_id) override;
+  bool FinalizeEnv() override;
+  bool LoadModelFromFile(const std::string &file_name, uint32_t &model_id) override;
+  bool UnloadModel(uint32_t model_id) override;
+  bool ExecuteModel(uint32_t model_id, const RequestBase &inputs, ReplyBase &outputs) override;
+
+ private:
+  std::shared_ptr<session::SessionBasic> session_impl_ = nullptr;
+  std::vector<uint32_t> graph_id_;
+  std::string device_type_;
+  int32_t device_id_;
+#ifdef ENABLE_D
+  rtContext_t context_ = nullptr;
+#endif
+
+  std::shared_ptr<FuncGraph> LoadModel(const char *model_buf, size_t size, const std::string &device);
+  std::shared_ptr<std::vector<char>> ReadFile(const std::string &file);
+  static void RegAllOp();
+  string AjustTargetName(const std::string &device);
+  bool CompileGraph(std::shared_ptr<FuncGraph> funcGraphPtr, uint32_t &model_id);
+  bool CheckModelInputs(uint32_t graph_id, const std::vector<tensor::TensorPtr> &inputs) const;
+  std::vector<tensor::TensorPtr> RunGraph(uint32_t graph_id, const std::vector<tensor::TensorPtr> &inputs);
+};
+}  // namespace inference
+}  // namespace mindspore
+#endif  // MINDSPORE_CCSRC_SESSION_SESSION_BASIC_H
--- a/mindspore/ccsrc/backend/session/session.cc
+++ b/mindspore/ccsrc/backend/session/session.cc
@ -1,214 +0,0 @@
-/**
- * Copyright 2020 Huawei Technologies Co., Ltd
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include <memory>
-#include <algorithm>
-#include "include/inference.h"
-#include "backend/session/session.h"
-#include "utils/load_onnx/anf_converter.h"
-#include "backend/session/session_basic.h"
-#include "backend/session/session_factory.h"
-#include "utils/base_ref_utils.h"
-#include "backend/kernel_compiler/oplib/oplib.h"
-#ifdef ENABLE_D
-#include "utils/context/ms_context.h"
-#include "backend/session/ascend_session.h"
-#else
-#include "backend/session/cpu_session.h"
-#endif
-
-namespace py = pybind11;
-namespace mindspore::inference {
-std::shared_ptr<FuncGraph> LoadModel(const char *model_buf, size_t size, const std::string &device) {
-  try {
-    inference::Session::RegAllOp();
-    auto anf_graph = lite::AnfConverter::RunAnfConverter(model_buf, size);
-    return anf_graph;
-  } catch (std::exception &e) {
-    MS_LOG(ERROR) << "Inference LoadModel failed";
-    return nullptr;
-  }
-}
-
-void ExitInference() {
-  auto ms_context = MsContext::GetInstance();
-  if (ms_context == nullptr) {
-    MS_LOG(ERROR) << "Get Context failed!";
-    return;
-  }
-  if (!ms_context->CloseTsd()) {
-    MS_LOG(ERROR) << "Inference CloseTsd failed!";
-    return;
-  }
-}
-
-std::shared_ptr<MSSession> MSSession::CreateSession(const std::string &device, uint32_t device_id) {
-  try {
-    auto session = std::make_shared<inference::Session>();
-    auto ret = session->Init(device, device_id);
-    if (ret != 0) {
-      return nullptr;
-    }
-    return session;
-  } catch (std::exception &e) {
-    MS_LOG(ERROR) << "Inference CreatSession failed";
-    return nullptr;
-  }
-}
-
-void Session::RegAllOp() {
-  static std::mutex init_mutex;
-  static bool Initialized = false;
-
-  std::lock_guard<std::mutex> lock(init_mutex);
-  if (Initialized) {
-    return;
-  }
-  Initialized = true;
-  MsContext::GetInstance()->set_execution_mode(kGraphMode);
-  Py_Initialize();
-  auto c_expression = PyImport_ImportModule("mindspore._c_expression");
-  if (c_expression == nullptr) {
-    MS_LOG(EXCEPTION) << "Failed to import mindspore._c_expression  module.";
-    return;
-  }
-  PyObject *c_expression_dict = PyModule_GetDict(c_expression);
-
-  PyObject *op_info_loader_class = PyDict_GetItemString(c_expression_dict, "OpInfoLoaderPy");
-  if (op_info_loader_class == nullptr) {
-    MS_LOG(EXCEPTION) << "Failed to get op_info_loader_class from mindspore._c_expression.";
-    return;
-  }
-  PyObject *op_info_loader = PyInstanceMethod_New(op_info_loader_class);
-  if (op_info_loader == nullptr) {
-    MS_LOG(EXCEPTION) << "Failed to create op_info_loader instance.";
-    return;
-  }
-  PyObject *op_info_loader_ins = PyObject_CallObject(op_info_loader, nullptr);
-  if (op_info_loader_ins == nullptr) {
-    MS_LOG(EXCEPTION) << "Failed to call op_info_loader instance.";
-    return;
-  }
-  auto all_ops_info_vector_addr_ul = PyObject_CallMethod(op_info_loader_ins, "get_all_ops_info", nullptr);
-  if (all_ops_info_vector_addr_ul == nullptr) {
-    MS_LOG(EXCEPTION) << "Failed to call get_all_ops_addr.";
-    return;
-  }
-  auto all_ops_info_vector_addr = PyLong_AsVoidPtr(all_ops_info_vector_addr_ul);
-  auto all_ops_info = static_cast<std::vector<kernel::OpInfo *> *>(all_ops_info_vector_addr);
-  for (auto op_info : *all_ops_info) {
-    kernel::OpLib::RegOpInfo(std::shared_ptr<kernel::OpInfo>(op_info));
-  }
-  all_ops_info->clear();
-  delete all_ops_info;
-  Py_DECREF(op_info_loader);
-  Py_DECREF(op_info_loader_class);
-  Py_DECREF(c_expression_dict);
-  Py_DECREF(c_expression);
-  return;
-}
-
-uint32_t Session::CompileGraph(std::shared_ptr<FuncGraph> funcGraphPtr) {
-  MS_ASSERT(session_impl_ != nullptr);
-  try {
-    auto graph_id = session_impl_->CompileGraph(NOT_NULL(funcGraphPtr));
-    py::gil_scoped_release gil_release;
-    return graph_id;
-  } catch (std::exception &e) {
-    MS_LOG(ERROR) << "Inference CompileGraph failed";
-    return static_cast<uint32_t>(-1);
-  }
-}
-
-MultiTensor Session::RunGraph(uint32_t graph_id, const std::vector<std::shared_ptr<inference::MSTensor>> &inputs) {
-  try {
-    std::vector<tensor::TensorPtr> inTensors;
-    inTensors.resize(inputs.size());
-    bool has_error = false;
-    std::transform(inputs.begin(), inputs.end(), inTensors.begin(),
-                   [&has_error](const std::shared_ptr<inference::MSTensor> &tensor_ptr) -> tensor::TensorPtr {
-                     if (tensor_ptr == nullptr) {
-                       MS_LOG(WARNING) << "input MSTensor is nullptr, return nullptr";
-                       has_error = true;
-                       return nullptr;
-                     }
-                     auto tensor = static_cast<inference::Tensor *>(tensor_ptr.get());
-                     if (tensor == nullptr) {
-                       MS_LOG(ERROR) << "Can not cast input MSTensor to tensor";
-                       has_error = true;
-                       return nullptr;
-                     }
-                     return tensor->tensor();
-                   });
-    if (has_error) {
-      MS_LOG(ERROR) << "Init Tensor failed, returning empty result";
-      std::vector<std::shared_ptr<inference::MSTensor>> multiTensor;
-      return multiTensor;
-    }
-    VectorRef outputs;
-    session_impl_->RunGraph(graph_id, inTensors, &outputs);
-
-    return TransformVectorRefToMultiTensor(outputs);
-  } catch (std::exception &e) {
-    MS_LOG(ERROR) << "Inference Rungraph failed";
-    return MultiTensor();
-  }
-}
-namespace {
-string AjustTargetName(const std::string &device) {
-  if (device == kAscendDevice) {
-    return std::string(kAscendDevice) + "Inference";
-  } else {
-    MS_LOG(ERROR) << "Only support device Ascend right now";
-    return "";
-  }
-}
-}  // namespace
-int Session::Init(const std::string &device, uint32_t device_id) {
-  RegAllOp();
-  auto ms_context = MsContext::GetInstance();
-  ms_context->set_execution_mode(kGraphMode);
-  ms_context->set_device_id(device_id);
-  auto ajust_device = AjustTargetName(device);
-  if (ajust_device == "") {
-    return -1;
-  }
-  ms_context->set_device_target(device);
-  session_impl_ = session::SessionFactory::Get().Create(ajust_device);
-  if (session_impl_ == nullptr) {
-    MS_LOG(ERROR) << "Session create failed!, please make sure target device:" << device << " is available.";
-    return -1;
-  }
-  session_impl_->Init(device_id);
-  if (ms_context == nullptr) {
-    MS_LOG(ERROR) << "Get Context failed!";
-    return -1;
-  }
-  if (!ms_context->OpenTsd()) {
-    MS_LOG(ERROR) << "Session init OpenTsd failed!";
-    return -1;
-  }
-  return 0;
-}
-
-bool Session::CheckModelInputs(uint32_t graph_id,
-                               const std::vector<std::shared_ptr<inference::MSTensor>> &inputs) const {
-  MS_ASSERT(session_impl_ != nullptr);
-  return session_impl_->CheckModelInputs(graph_id, inputs);
-}
-
-Session::Session() = default;
-}  // namespace mindspore::inference
--- a/mindspore/ccsrc/backend/session/session_basic.cc
+++ b/mindspore/ccsrc/backend/session/session_basic.cc
@ -276,7 +276,7 @@ bool ExistSummaryNode(const KernelGraph *graph) {

 GraphId SessionBasic::graph_sum_ = 0;

-KernelGraphPtr SessionBasic::GetGraph(mindspore::GraphId graph_id) {
+KernelGraphPtr SessionBasic::GetGraph(mindspore::GraphId graph_id) const {
  auto it = graphs_.find(graph_id);
  if (it == graphs_.end()) {
    MS_LOG(WARNING) << "Can't find graph " << graph_id;
--- a/mindspore/ccsrc/backend/session/session_basic.h
+++ b/mindspore/ccsrc/backend/session/session_basic.h
@ -106,9 +106,7 @@ class SessionBasic {
  virtual void GetSummaryNodes(KernelGraph *graph);
  void AssignParamKey(const KernelGraphPtr &kernel_graph);
  void InitPSParamAndOptim(const KernelGraphPtr &kernel_graph, const std::vector<tensor::TensorPtr> &inputs_const);
-  virtual bool CheckModelInputs(uint32_t graph_id, const std::vector<std::shared_ptr<inference::MSTensor>> &inputs) {
-    return true;
-  }
+  virtual bool CheckModelInputs(uint32_t graph_id, const std::vector<tensor::TensorPtr> &inputs) const { return true; }

 #ifdef ENABLE_DEBUGGER
  // set debugger
@ -120,7 +118,7 @@ class SessionBasic {

 protected:
  // Get graph by graph id ,if not exist return null ptr
-  KernelGraphPtr GetGraph(GraphId graph_id);
+  KernelGraphPtr GetGraph(GraphId graph_id) const;
  virtual void LoadInputData(const std::shared_ptr<KernelGraph> &kernel_graph,
                             const std::vector<tensor::TensorPtr> &inputs_const) const;
  void UpdateOutputs(const std::shared_ptr<KernelGraph> &kernel_graph, VectorRef *const outputs,
--- a/mindspore/ccsrc/utils/base_ref_utils.cc
+++ b/mindspore/ccsrc/utils/base_ref_utils.cc
@ -17,17 +17,17 @@
 #include <vector>
 #include <memory>
 #include "utils/base_ref_utils.h"
-#include "include/ms_tensor.h"
+#include "include/infer_tensor.h"
 #include "ir/tensor.h"

 namespace mindspore {
-void IterateFindTensor(std::vector<std::shared_ptr<inference::MSTensor>> *msTensors, const VectorRef &ref_list) {
+
+void IterateFindTensor(std::vector<tensor::TensorPtr> *msTensors, const VectorRef &ref_list) {
  for (size_t i = 0; i < ref_list.size(); ++i) {
    if (utils::isa<tensor::TensorPtr>(ref_list[i])) {
      auto tensor_ptr = utils::cast<std::shared_ptr<tensor::Tensor>>(ref_list[i]);
      MS_EXCEPTION_IF_NULL(tensor_ptr);
-      auto tensor = new inference::Tensor(tensor_ptr);
-      msTensors->emplace_back(std::shared_ptr<inference::MSTensor>(tensor));
+      msTensors->emplace_back(tensor_ptr);
    } else if (utils::isa<VectorRef>(ref_list[i])) {
      auto ref_iter = utils::cast<VectorRef>(ref_list[i]);
      IterateFindTensor(msTensors, ref_iter);
@ -37,19 +37,19 @@ void IterateFindTensor(std::vector<std::shared_ptr<inference::MSTensor>> *msTens
  }
 }

-std::vector<std::shared_ptr<inference::MSTensor>> TransformVectorRefToMultiTensor(const VectorRef &base_ref) {
-  std::vector<std::shared_ptr<inference::MSTensor>> msTensors;
+std::vector<tensor::TensorPtr> TransformVectorRefToMultiTensor(const VectorRef &base_ref) {
+  std::vector<tensor::TensorPtr> msTensors;
  if (utils::isa<VectorRef>(base_ref)) {
    auto ref_list = utils::cast<VectorRef>(base_ref);
    IterateFindTensor(&msTensors, ref_list);
  } else if (utils::isa<tensor::Tensor>(base_ref)) {
    auto tensor_ptr = utils::cast<std::shared_ptr<tensor::Tensor>>(base_ref);
    MS_EXCEPTION_IF_NULL(tensor_ptr);
-    auto tensor = new inference::Tensor(tensor_ptr);
-    msTensors.emplace_back(std::shared_ptr<inference::MSTensor>(tensor));
+    msTensors.emplace_back(tensor_ptr);
  } else {
    MS_LOG(EXCEPTION) << "The output is not a base ref list or a tensor!";
  }
  return msTensors;
 }
+
 }  // namespace mindspore
--- a/mindspore/ccsrc/utils/base_ref_utils.h
+++ b/mindspore/ccsrc/utils/base_ref_utils.h
@ -17,11 +17,12 @@
 #include <vector>
 #include <memory>
 #include "utils/base_ref.h"
-#include "include/ms_tensor.h"
+#include "include/infer_tensor.h"
+#include "ir/tensor.h"

 #ifndef MINDSPORE_CCSRC_UTILS_BASE_REF_UTILS_H
 #define MINDSPORE_CCSRC_UTILS_BASE_REF_UTILS_H
 namespace mindspore {
-std::vector<std::shared_ptr<inference::MSTensor>> TransformVectorRefToMultiTensor(const VectorRef &base_ref);
+std::vector<tensor::TensorPtr> TransformVectorRefToMultiTensor(const VectorRef &base_ref);
 }  // namespace mindspore
 #endif  // MINDSPORE_CCSRC_UTILS_BASE_REF_UTILS_H
--- a/mindspore/core/ir/lite/tensor.cc
+++ b/mindspore/core/ir/lite/tensor.cc
@ -85,68 +85,4 @@ bool Tensor::operator==(const Value &other) const {
  }
 }
 }  // namespace tensor
-
-namespace inference {
-MSTensor *MSTensor::CreateTensor(TypeId data_type, const std::vector<int> &shape) {
-  return new Tensor(data_type, shape);
-}
-
-Tensor::Tensor() { this->tensor_impl_ = std::make_shared<tensor::Tensor>(); }
-
-Tensor::Tensor(TypeId data_type, const std::vector<int> &shape) {
-  this->tensor_impl_ = std::make_shared<tensor::Tensor>(data_type, shape);
-}
-
-Tensor::Tensor(std::shared_ptr<tensor::Tensor> tensor_ptr) { this->tensor_impl_ = std::move(tensor_ptr); }
-
-TypeId Tensor::data_type() const {
-  MS_ASSERT(this->tensor_impl_ != nullptr);
-  return this->tensor_impl_->data_type();
-}
-
-TypeId Tensor::set_data_type(TypeId data_type) {
-  MS_ASSERT(this->tensor_impl_ != nullptr);
-  return this->tensor_impl_->set_data_type(data_type);
-}
-
-std::vector<int> Tensor::shape() const {
-  MS_ASSERT(this->tensor_impl_ != nullptr);
-  return this->tensor_impl_->shape();
-}
-
-size_t Tensor::set_shape(const std::vector<int> &shape) {
-  MS_ASSERT(this->tensor_impl_ != nullptr);
-  return this->tensor_impl_->set_shape(shape);
-}
-
-int Tensor::DimensionSize(size_t index) const {
-  MS_ASSERT(this->tensor_impl_ != nullptr);
-  return this->tensor_impl_->DimensionSize(index);
-}
-
-int Tensor::ElementsNum() const {
-  MS_ASSERT(this->tensor_impl_ != nullptr);
-  return this->tensor_impl_->ElementsNum();
-}
-
-std::size_t Tensor::hash() const {
-  MS_ASSERT(this->tensor_impl_ != nullptr);
-  return this->tensor_impl_->hash();
-}
-
-std::shared_ptr<tensor::Tensor> Tensor::tensor() const {
-  MS_ASSERT(this->tensor_impl_ != nullptr);
-  return this->tensor_impl_;
-}
-
-size_t Tensor::Size() const {
-  MS_ASSERT(this->tensor_impl_ != nullptr);
-  return this->tensor_impl_->Size();
-}
-
-void *Tensor::MutableData() const {
-  MS_ASSERT(this->tensor_impl_ != nullptr);
-  return this->tensor_impl_->data();
-}
-}  // namespace inference
 }  // namespace mindspore
--- a/mindspore/core/ir/lite/tensor.h
+++ b/mindspore/core/ir/lite/tensor.h
@ -56,42 +56,6 @@ class Tensor : public MetaTensor {

 using TensorPtr = std::shared_ptr<Tensor>;
 }  // namespace tensor
-
-namespace inference {
-class Tensor : public MSTensor {
- public:
-  Tensor();
-
-  Tensor(TypeId data_type, const std::vector<int> &shape);
-
-  explicit Tensor(std::shared_ptr<tensor::Tensor> tensor_ptr);
-
-  ~Tensor() = default;
-
-  TypeId data_type() const override;
-
-  TypeId set_data_type(const TypeId data_type) override;
-
-  std::vector<int> shape() const override;
-
-  size_t set_shape(const std::vector<int> &shape) override;
-
-  int DimensionSize(size_t index) const override;
-
-  int ElementsNum() const override;
-
-  std::size_t hash() const override;
-
-  std::shared_ptr<tensor::Tensor> tensor() const;
-
-  size_t Size() const override;
-
-  void *MutableData() const override;
-
- protected:
-  std::shared_ptr<tensor::Tensor> tensor_impl_;
-};
-}  // namespace inference
 }  // namespace mindspore

 #endif  // MINDSPORE_CORE_IR_LITE_TENSOR_H_
--- a/mindspore/core/ir/tensor.cc
+++ b/mindspore/core/ir/tensor.cc
@ -454,67 +454,4 @@ TypeId Tensor::set_data_type(const TypeId data_type) {
  return data_type;
 }
 }  // namespace tensor
-
-namespace inference {
-MSTensor *MSTensor::CreateTensor(TypeId data_type, const std::vector<int> &shape) {
-  return new Tensor(data_type, shape);
-}
-
-Tensor::Tensor(TypeId data_type, const std::vector<int> &shape) {
-  this->tensor_impl_ = std::make_shared<tensor::Tensor>(data_type, shape);
-}
-
-Tensor::Tensor(std::shared_ptr<tensor::Tensor> tensor_ptr) { this->tensor_impl_ = std::move(tensor_ptr); }
-
-TypeId Tensor::data_type() const {
-  MS_ASSERT(this->tensor_impl_ != nullptr);
-  return this->tensor_impl_->data_type();
-}
-
-TypeId Tensor::set_data_type(TypeId data_type) {
-  MS_ASSERT(this->tensor_impl_ != nullptr);
-  return this->tensor_impl_->set_data_type(data_type);
-}
-
-std::vector<int> Tensor::shape() const {
-  MS_ASSERT(this->tensor_impl_ != nullptr);
-  return this->tensor_impl_->shape();
-}
-
-size_t Tensor::set_shape(const std::vector<int> &shape) {
-  MS_ASSERT(this->tensor_impl_ != nullptr);
-  return this->tensor_impl_->set_shape(shape);
-}
-
-int Tensor::DimensionSize(size_t index) const {
-  MS_ASSERT(this->tensor_impl_ != nullptr);
-  return this->tensor_impl_->DimensionSize(index);
-}
-
-int Tensor::ElementsNum() const {
-  MS_ASSERT(this->tensor_impl_ != nullptr);
-  return this->tensor_impl_->ElementsNum();
-}
-
-std::size_t Tensor::hash() const {
-  MS_ASSERT(this->tensor_impl_ != nullptr);
-  return this->tensor_impl_->hash();
-}
-
-std::shared_ptr<tensor::Tensor> Tensor::tensor() const {
-  MS_ASSERT(this->tensor_impl_ != nullptr);
-  return this->tensor_impl_;
-}
-
-size_t Tensor::Size() const {
-  MS_ASSERT(this->tensor_impl_ != nullptr);
-  return this->tensor_impl_->data().nbytes();
-}
-
-void *Tensor::MutableData() const {
-  MS_ASSERT(this->tensor_impl_ != nullptr);
-  return this->tensor_impl_->data_c();
-}
-
-}  // namespace inference
 }  // namespace mindspore
--- a/mindspore/core/ir/tensor.h
+++ b/mindspore/core/ir/tensor.h
@ -25,7 +25,6 @@
 #include "Eigen/Core"
 #include "ir/device_sync.h"
 #include "ir/meta_tensor.h"
-#include "include/ms_tensor.h"
 #include "utils/log_adapter.h"

 using float16 = Eigen::half;
@ -237,40 +236,6 @@ class Tensor : public MetaTensor {
 using TensorPtr = std::shared_ptr<Tensor>;
 using TensorPtrList = std::vector<std::shared_ptr<Tensor>>;
 }  // namespace tensor
-
-namespace inference {
-class Tensor : public MSTensor {
- public:
-  Tensor(TypeId data_type, const std::vector<int> &shape);
-
-  explicit Tensor(std::shared_ptr<tensor::Tensor> tensor_ptr);
-
-  ~Tensor() = default;
-
-  TypeId data_type() const override;
-
-  TypeId set_data_type(const TypeId data_type) override;
-
-  std::vector<int> shape() const override;
-
-  size_t set_shape(const std::vector<int> &shape) override;
-
-  int DimensionSize(size_t index) const override;
-
-  int ElementsNum() const override;
-
-  std::size_t hash() const override;
-
-  std::shared_ptr<tensor::Tensor> tensor() const;
-
-  size_t Size() const override;
-
-  void *MutableData() const override;
-
- protected:
-  std::shared_ptr<tensor::Tensor> tensor_impl_;
-};
-}  // namespace inference
 }  // namespace mindspore

 #endif  // MINDSPORE_CORE_IR_TENSOR_H_
--- a/serving/CMakeLists.txt
+++ b/serving/CMakeLists.txt
@ -13,19 +13,19 @@ add_library(protobuf::libprotobuf ALIAS protobuf::protobuf)
 add_executable(protobuf::libprotoc ALIAS protobuf::protoc)

 set(_PROTOBUF_LIBPROTOBUF protobuf::libprotobuf)
-if(CMAKE_CROSSCOMPILING)
+if (CMAKE_CROSSCOMPILING)
    find_program(_PROTOBUF_PROTOC protoc)
-else()
+else ()
    set(_PROTOBUF_PROTOC $<TARGET_FILE:protobuf::protoc>)
-endif()
+endif ()

 # Find gRPC installation
 # Looks for gRPCConfig.cmake file installed by gRPC's cmake installation.
 if (EXISTS ${grpc_ROOT}/lib64)
    set(gRPC_DIR "${grpc_ROOT}/lib64/cmake/grpc")
-else()
+else ()
    set(gRPC_DIR "${grpc_ROOT}/lib/cmake/grpc")
-endif()
+endif ()
 message("serving using grpc_DIR : " ${gPRC_DIR})

 find_package(gRPC CONFIG REQUIRED)
@ -34,11 +34,11 @@ message(STATUS "Using gRPC ${gRPC_VERSION}")
 set(_GRPC_GRPCPP gRPC::grpc++)
 set(_REFLECTION gRPC::grpc++_reflection)

-if(CMAKE_CROSSCOMPILING)
+if (CMAKE_CROSSCOMPILING)
    find_program(_GRPC_CPP_PLUGIN_EXECUTABLE grpc_cpp_plugin)
-else()
+else ()
    set(_GRPC_CPP_PLUGIN_EXECUTABLE $<TARGET_FILE:gRPC::grpc_cpp_plugin>)
-endif()
+endif ()

 # Proto file
 get_filename_component(hw_proto "ms_service.proto" ABSOLUTE)
@ -67,11 +67,36 @@ file(GLOB_RECURSE CORE_SRC_LIST RELATIVE ${CMAKE_CURRENT_SOURCE_DIR}

 list(APPEND SERVING_SRC "main.cc" ${hw_proto_srcs} ${hw_grpc_srcs} ${CORE_SRC_LIST})

+option(ENABLE_ACL "enable acl" OFF)
+
+if (ENABLE_ACL)
+    if (DEFINED ENV{ASCEND_CUSTOM_PATH})
+        set(ASCEND_PATH $ENV{ASCEND_CUSTOM_PATH})
+    else ()
+        set(ASCEND_PATH /usr/local/Ascend)
+    endif ()
+    set(ACL_LIB_DIR ${ASCEND_PATH}/acllib/)
+    MESSAGE("acl lib dir " ${ACL_LIB_DIR})
+
+    include_directories(${ACL_LIB_DIR}/include/)
+    file(GLOB_RECURSE ACL_SESSION_SRC_LIST RELATIVE ${CMAKE_CURRENT_SOURCE_DIR} "acl/*.cc")
+    list(APPEND SERVING_SRC ${ACL_SESSION_SRC_LIST})
+endif ()
+
 include_directories(${CMAKE_BINARY_DIR})
 add_executable(ms_serving ${SERVING_SRC})
-target_link_libraries(ms_serving inference mindspore_gvar)
+
 target_link_libraries(ms_serving ${_REFLECTION} ${_GRPC_GRPCPP} ${_PROTOBUF_LIBPROTOBUF} pthread)
 if (ENABLE_D)
    add_compile_definitions(ENABLE_D)
    target_link_libraries(ms_serving ${RUNTIME_LIB})
-endif()
+endif ()
+
+if (ENABLE_ACL)
+    add_compile_definitions(ENABLE_ACL)
+    set(ALC_LIB_SO ${ACL_LIB_DIR}/lib64/libruntime.so ${ACL_LIB_DIR}/lib64/libascendcl.so
+            ${ACL_LIB_DIR}/lib64/libacl_retr.so ${ACL_LIB_DIR}/lib64/libacl_cblas.so)
+    target_link_libraries(ms_serving ${ALC_LIB_SO})
+else ()
+    target_link_libraries(ms_serving inference mindspore_gvar)
+endif ()
--- a/serving/acl/acl_session.cc
+++ b/serving/acl/acl_session.cc
@ -0,0 +1,136 @@
+/**
+ * Copyright 2020 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <memory>
+#include <algorithm>
+#include "serving/acl/acl_session.h"
+#include "include/infer_log.h"
+
+namespace mindspore::inference {
+
+std::shared_ptr<InferSession> InferSession::CreateSession(const std::string &device, uint32_t device_id) {
+  try {
+    auto session = std::make_shared<AclSession>();
+    auto ret = session->InitEnv(device, device_id);
+    if (!ret) {
+      return nullptr;
+    }
+    return session;
+  } catch (std::exception &e) {
+    MSI_LOG_ERROR << "Inference CreatSession failed";
+    return nullptr;
+  }
+}
+
+bool AclSession::LoadModelFromFile(const std::string &file_name, uint32_t &model_id) {
+  return model_process_.LoadModelFromFile(file_name, model_id);
+}
+
+bool AclSession::UnloadModel(uint32_t model_id) {
+  model_process_.UnLoad();
+  return true;
+}
+
+bool AclSession::ExecuteModel(uint32_t model_id, const RequestBase &request,
+                              ReplyBase &reply) {  // set d context
+  aclError rt_ret = aclrtSetCurrentContext(context_);
+  if (rt_ret != ACL_ERROR_NONE) {
+    MSI_LOG_ERROR << "set the ascend device context failed";
+    return false;
+  }
+  return model_process_.Execute(request, reply);
+}
+
+bool AclSession::InitEnv(const std::string &device_type, uint32_t device_id) {
+  device_type_ = device_type;
+  device_id_ = device_id;
+  auto ret = aclInit(nullptr);
+  if (ret != ACL_ERROR_NONE) {
+    MSI_LOG_ERROR << "Execute aclInit Failed";
+    return false;
+  }
+  MSI_LOG_INFO << "acl init success";
+
+  ret = aclrtSetDevice(device_id_);
+  if (ret != ACL_ERROR_NONE) {
+    MSI_LOG_ERROR << "acl open device " << device_id_ << " failed";
+    return false;
+  }
+  MSI_LOG_INFO << "open device " << device_id_ << " success";
+
+  ret = aclrtCreateContext(&context_, device_id_);
+  if (ret != ACL_ERROR_NONE) {
+    MSI_LOG_ERROR << "acl create context failed";
+    return false;
+  }
+  MSI_LOG_INFO << "create context success";
+
+  ret = aclrtCreateStream(&stream_);
+  if (ret != ACL_ERROR_NONE) {
+    MSI_LOG_ERROR << "acl create stream failed";
+    return false;
+  }
+  MSI_LOG_INFO << "create stream success";
+
+  aclrtRunMode run_mode;
+  ret = aclrtGetRunMode(&run_mode);
+  if (ret != ACL_ERROR_NONE) {
+    MSI_LOG_ERROR << "acl get run mode failed";
+    return false;
+  }
+  bool is_device = (run_mode == ACL_DEVICE);
+  model_process_.SetIsDevice(is_device);
+  MSI_LOG_INFO << "get run mode success is device input/output " << is_device;
+
+  MSI_LOG_INFO << "Init acl success, device id " << device_id_;
+  return true;
+}
+
+bool AclSession::FinalizeEnv() {
+  aclError ret;
+  if (stream_ != nullptr) {
+    ret = aclrtDestroyStream(stream_);
+    if (ret != ACL_ERROR_NONE) {
+      MSI_LOG_ERROR << "destroy stream failed";
+    }
+    stream_ = nullptr;
+  }
+  MSI_LOG_INFO << "end to destroy stream";
+  if (context_ != nullptr) {
+    ret = aclrtDestroyContext(context_);
+    if (ret != ACL_ERROR_NONE) {
+      MSI_LOG_ERROR << "destroy context failed";
+    }
+    context_ = nullptr;
+  }
+  MSI_LOG_INFO << "end to destroy context";
+
+  ret = aclrtResetDevice(device_id_);
+  if (ret != ACL_ERROR_NONE) {
+    MSI_LOG_ERROR << "reset devie " << device_id_ << " failed";
+  }
+  MSI_LOG_INFO << "end to reset device " << device_id_;
+
+  ret = aclFinalize();
+  if (ret != ACL_ERROR_NONE) {
+    MSI_LOG_ERROR << "finalize acl failed";
+  }
+  MSI_LOG_INFO << "end to finalize acl";
+  return true;
+}
+
+AclSession::AclSession() = default;
+}  // namespace mindspore::inference
--- a/mindspore/ccsrc/backend/session/session.h
+++ b/mindspore/ccsrc/backend/session/session.h
@ -13,8 +13,8 @@
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
-#ifndef MINDSPORE_CCSRC_BACKEND_SESSION_SESSION_H
-#define MINDSPORE_CCSRC_BACKEND_SESSION_SESSION_H
+#ifndef MINDSPORE_SERVING_ACL_SESSION_H
+#define MINDSPORE_SERVING_ACL_SESSION_H

 #include <vector>
 #include <string>
@ -23,31 +23,28 @@
 #include <memory>
 #include <map>

-#include "backend/session/session_basic.h"
-#include "ir/anf.h"
 #include "include/inference.h"
+#include "serving/acl/model_process.h"

 namespace mindspore {
 namespace inference {
-class Session : public MSSession {
+class AclSession : public InferSession {
 public:
-  Session();
+  AclSession();

-  uint32_t CompileGraph(std::shared_ptr<FuncGraph> funcGraphPtr) override;
-
-  MultiTensor RunGraph(uint32_t graph_id, const std::vector<std::shared_ptr<inference::MSTensor>> &inputs) override;
-
-  bool CheckModelInputs(uint32_t graph_id,
-                        const std::vector<std::shared_ptr<inference::MSTensor>> &inputs) const override;
-
-  int Init(const std::string &device, uint32_t device_id);
-
-  static void RegAllOp();
+  bool InitEnv(const std::string &device_type, uint32_t device_id) override;
+  bool FinalizeEnv() override;
+  bool LoadModelFromFile(const std::string &file_name, uint32_t &model_id) override;
+  bool UnloadModel(uint32_t model_id) override;
+  bool ExecuteModel(uint32_t model_id, const RequestBase &request, ReplyBase &reply) override;

 private:
-  std::shared_ptr<session::SessionBasic> session_impl_ = nullptr;
-  std::vector<uint32_t> graph_id_;
+  std::string device_type_;
+  int32_t device_id_;
+  aclrtStream stream_ = nullptr;
+  aclrtContext context_ = nullptr;
+  ModelProcess model_process_;
 };
 }  // namespace inference
 }  // namespace mindspore
-#endif  // MINDSPORE_CCSRC_BACKEND_SESSION_SESSION_BASIC_H
+#endif  // MINDSPORE_SERVING_ACL_SESSION_H
--- a/serving/acl/model_process.cc
+++ b/serving/acl/model_process.cc
--- a/serving/acl/model_process.h
+++ b/serving/acl/model_process.h
@ -0,0 +1,74 @@
+/**
+ * Copyright 2020 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef INC_MODEL_PROCESS_ACL
+#define INC_MODEL_PROCESS_ACL
+#include <vector>
+#include <string>
+#include "acl/acl.h"
+#include "acl/acl_mdl.h"
+#include "acl/acl_rt.h"
+#include "serving/core/util/status.h"
+#include "include/inference.h"
+
+namespace mindspore {
+namespace inference {
+
+struct AclTensorInfo {
+  void *device_data;
+  size_t buffer_size;
+  aclDataType data_type;
+  std::vector<int64_t> dims;
+};
+
+class ModelProcess {
+ public:
+  ModelProcess() {}
+  ~ModelProcess() {}
+
+  bool LoadModelFromFile(const std::string &file_name, uint32_t &model_id);
+  void UnLoad();
+
+  // override this method to avoid request/reply data copy
+  bool Execute(const RequestBase &request, ReplyBase &reply);
+
+  void SetIsDevice(bool is_device) { is_run_on_device_ = is_device; }
+
+ private:
+  uint32_t model_id_ = 0xffffffff;
+  bool is_run_on_device_ = false;
+  aclmdlDesc *model_desc_ = nullptr;
+  aclmdlDataset *inputs_ = nullptr;
+  aclmdlDataset *outputs_ = nullptr;
+  std::vector<AclTensorInfo> input_infos_;
+  std::vector<AclTensorInfo> output_infos_;
+
+  bool CreateDataBuffer(void *&data_mem_buffer, size_t buffer_size, aclmdlDataset *dataset);
+  bool CheckAndInitInput(const RequestBase &request);
+  bool BuildOutputs(ReplyBase &reply);
+
+  bool InitInputsBuffer();
+  bool InitOutputsBuffer();
+  void DestroyInputsDataset();
+  void DestroyInputsDataMem();
+  void DestroyInputsBuffer();
+  void DestroyOutputsBuffer();
+};
+
+}  // namespace inference
+}  // namespace mindspore
+
+#endif
--- a/Show More
+++ b/Show More