introducing new C++ API

5 years ago · bd5a777f81
parent 2f565f4c20
commit bd5a777f81
58 changed files with 3500 additions and 196 deletions
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@ -17,6 +17,10 @@ else()
    set(CMAKE_CXX_FLAGS_RELEASE "$ENV{CXXFLAGS} -O2 -Wl,--allow-shlib-undefined -DHALF_ENABLE_CPP11_USER_LITERALS=0 -D_FORTIFY_SOURCE=2")
 endif()

+if (ENABLE_PYTHON)
+    add_compile_definitions(ENABLE_PYTHON)
+endif()
+
 set(CMAKE_CXX_FLAGS_DEBUG "$ENV{CXXFLAGS} -O0 -g2 -ggdb -fno-inline-functions -fno-omit-frame-pointer -Wl,--allow-shlib-undefined -D_LIBCPP_INLINE_VISIBILITY='' -D'_LIBCPP_EXTERN_TEMPLATE(...)=' -DHALF_ENABLE_CPP11_USER_LITERALS=0 -D_FORTIFY_SOURCE=2 -Wno-cpp")

 set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -I/usr/local/include -std=c++17 -Werror -Wall -Wno-deprecated-declarations -fPIC")
--- a/build.sh
+++ b/build.sh
@ -25,7 +25,7 @@ usage()
  echo "Usage:"
  echo "bash build.sh [-d] [-r] [-v] [-c on|off] [-t on|off] [-g on|off] [-h] [-b ge] [-m infer|train] \\"
  echo "              [-a on|off] [-Q on|off] [-p on|off] [-i] [-L] [-R] [-D on|off] [-j[n]] [-e gpu|d|cpu] \\"
-  echo "              [-P on|off] [-z [on|off]] [-M on|off] [-V 9.2|10.1] [-I] [-K] [-B on|off] [-E]"
+  echo "              [-P on|off] [-z [on|off]] [-M on|off] [-V 9.2|10.1] [-I] [-K] [-B on|off] [-E] [-l on|off]"
  echo ""
  echo "Options:"
  echo "    -d Debug mode"
@ -56,6 +56,7 @@ usage()
  echo "    -s Enable serving module, default off"
  echo "    -B Enable debugger, default off"
  echo "    -E Enable IBVERBS for parameter server, default off"
+  echo "    -l Compile with python dependency, default on"
 }

 # check value of input is 'on' or 'off'
@ -98,9 +99,10 @@ checkopts()
  ENABLE_SERVING="off"
  ENABLE_DEBUGGER="off"
  ENABLE_IBVERBS="off"
+  ENABLE_PYTHON="on"

  # Process the options
-  while getopts 'drvj:c:t:hsb:a:g:p:ie:m:I:LRP:Q:D:zM:V:K:sB:E' opt
+  while getopts 'drvj:c:t:hsb:a:g:p:ie:m:l:I:LRP:Q:D:zM:V:K:sB:E' opt
  do
    OPTARG=$(echo ${OPTARG} | tr '[A-Z]' '[a-z]')
    case "${opt}" in
@ -151,6 +153,10 @@ checkopts()
        check_on_off $OPTARG p
        ENABLE_PROFILE="$OPTARG"
        ;;
+      l)
+        check_on_off $OPTARG l
+        ENABLE_PYTHON="$OPTARG"
+        ;;
      i)
        INC_BUILD="on"
        ;;
@ -316,6 +322,7 @@ build_mindspore()
        CMAKE_ARGS="${CMAKE_ARGS} -DENABLE_DUMP_E2E=ON"
    fi
    CMAKE_ARGS="${CMAKE_ARGS} -DENABLE_DUMP_IR=${ENABLE_DUMP_IR}"
+    CMAKE_ARGS="${CMAKE_ARGS} -DENABLE_PYTHON=${ENABLE_PYTHON}"
    if [[ "X$ENABLE_MPI" = "Xon" ]]; then
        CMAKE_ARGS="${CMAKE_ARGS} -DENABLE_MPI=ON"
    fi
--- a/cmake/options.cmake
+++ b/cmake/options.cmake
@ -19,6 +19,7 @@ option(ENABLE_MPI "enable mpi" OFF)
 option(ENABLE_AKG "enable akg" OFF)
 option(ENABLE_DEBUGGER "enable debugger" OFF)
 option(ENABLE_IBVERBS "enable IBVERBS for parameter server" OFF)
+option(ENABLE_PYTHON "Enable python" ON)

 if (CMAKE_CXX_COMPILER_ID STREQUAL "GNU")
    if (WIN32)
--- a/mindspore/ccsrc/dataset/CMakeLists.txt
+++ b/mindspore/ccsrc/dataset/CMakeLists.txt
@ -39,6 +39,7 @@ include_directories(${CMAKE_SOURCE_DIR}/mindspore/ccsrc/device/ascend/platform)
 include_directories(${CMAKE_BINARY_DIR}) # for protobuf generated .h

 include_directories(${CMAKE_SOURCE_DIR}/mindspore/ccsrc/mindrecord/include)
+include_directories(${CMAKE_SOURCE_DIR}/mindspore/ccsrc/dataset/include)
 ######################################################################

 ####################### Flags ########################################
@ -67,7 +68,10 @@ add_dependencies(engine-gnn core)
 add_dependencies(engine core)
 add_dependencies(text core)
 add_dependencies(text-kernels core)
-add_dependencies(APItoPython core)
+add_dependencies(cpp-API core)
+if (ENABLE_PYTHON)
+    add_dependencies(APItoPython core)
+endif()
 if (ENABLE_TDTQUE)
    add_dependencies(engine-tdt core)
 endif ()
@ -78,7 +82,7 @@ set(submodules
    $<TARGET_OBJECTS:kernels>
    $<TARGET_OBJECTS:kernels-image>
    $<TARGET_OBJECTS:kernels-data>
-    $<TARGET_OBJECTS:APItoPython>
+    $<TARGET_OBJECTS:cpp-API>
    $<TARGET_OBJECTS:engine-datasetops-source>
    $<TARGET_OBJECTS:engine-datasetops-source-sampler>
    $<TARGET_OBJECTS:engine-gnn>
@ -90,6 +94,12 @@ set(submodules
    $<TARGET_OBJECTS:text-kernels>
    )

+if (ENABLE_PYTHON)
+    set(submodules
+        ${submodules}
+        $<TARGET_OBJECTS:APItoPython>)
+endif()
+
 if (ENABLE_TDTQUE)
    add_library(_c_dataengine SHARED ${submodules} $<TARGET_OBJECTS:engine-tdt>)
 else ()
--- a/mindspore/ccsrc/dataset/api/CMakeLists.txt
+++ b/mindspore/ccsrc/dataset/api/CMakeLists.txt
@ -1,7 +1,16 @@
 file(GLOB_RECURSE _CURRENT_SRC_FILES RELATIVE ${CMAKE_CURRENT_SOURCE_DIR} "*.cc")
 set_property(SOURCE ${_CURRENT_SRC_FILES} PROPERTY COMPILE_DEFINITIONS SUBMODULE_ID=mindspore::SubModuleId::SM_MD)
-add_library(APItoPython OBJECT
-  de_pipeline.cc
-  python_bindings.cc
+if (ENABLE_PYTHON)
+  add_library(APItoPython OBJECT
+    de_pipeline.cc
+    python_bindings.cc
+    )
+  target_include_directories(APItoPython PRIVATE ${pybind11_INCLUDE_DIRS})
+endif()
+
+add_library(cpp-API OBJECT
+  datasets.cc
+  iterator.cc
+  transforms.cc
+  samplers.cc
  )
-target_include_directories(APItoPython PRIVATE ${pybind11_INCLUDE_DIRS})
--- a/mindspore/ccsrc/dataset/api/datasets.cc
+++ b/mindspore/ccsrc/dataset/api/datasets.cc
--- a/mindspore/ccsrc/dataset/api/iterator.cc
+++ b/mindspore/ccsrc/dataset/api/iterator.cc
@ -0,0 +1,101 @@
+/**
+ * Copyright 2020 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#include "dataset/include/iterator.h"
+#include "dataset/core/client.h"
+#include "dataset/include/datasets.h"
+
+namespace mindspore {
+namespace dataset {
+namespace api {
+
+// Get the next row from the data pipeline.
+void Iterator::GetNextRow(TensorMap *row) {
+  Status rc = iterator_->GetNextAsMap(row);
+  if (rc.IsError()) {
+    MS_LOG(ERROR) << "GetNextRow: Failed to get next row.";
+    row->clear();
+  }
+}
+
+// Shut down the data pipeline.
+void Iterator::Stop() {
+  // Releasing the iterator_ unique_ptre. This should trigger the destructor of iterator_.
+  iterator_.reset();
+
+  // Release ownership of tree_ shared pointer. This will decrement the ref count.
+  tree_.reset();
+}
+
+// Function to build and launch the execution tree.
+Status Iterator::BuildAndLaunchTree(std::shared_ptr<Dataset> ds) {
+  // One time init
+  Status rc;
+  rc = GlobalInit();
+  RETURN_IF_NOT_OK(rc);
+
+  // Instantiate the execution tree
+  tree_ = std::make_shared<ExecutionTree>();
+
+  // Iterative BFS converting Dataset tree into runtime Execution tree.
+  std::queue<std::pair<std::shared_ptr<Dataset>, std::shared_ptr<DatasetOp>>> q;
+
+  if (ds != nullptr) {
+    // Convert the current root node.
+    auto root_op = ds->Build()->front();
+    RETURN_UNEXPECTED_IF_NULL(root_op);
+
+    RETURN_IF_NOT_OK(tree_->AssociateNode(root_op));
+
+    q.push(std::make_pair(ds, root_op));
+
+    // Traverse down to the children and convert them to the corresponding DatasetOps (i.e. execution tree nodes)
+    while (!q.empty()) {
+      auto node_pair = q.front();
+      q.pop();
+      // Iterate through all the direct children of the first element in our BFS queue
+      for (auto child : node_pair.first->children) {
+        auto child_ops = child->Build();
+        RETURN_UNEXPECTED_IF_NULL(child_ops);
+        auto node_op = node_pair.second;
+        // Iterate through all the DatasetOps returned by calling Build on the last Dataset object, associate them
+        // with the execution tree and add the child and parent relationship between the nodes
+        // Note that some Dataset objects might return more than one DatasetOps
+        // e.g. MapDataset will return MapOp and ProjectOp if project_columns is set for MapDataset
+        for (auto child_op : *child_ops) {
+          RETURN_IF_NOT_OK(tree_->AssociateNode(child_op));
+          RETURN_IF_NOT_OK(node_op->AddChild(child_op));
+          node_op = child_op;
+        }
+        // Add the child and the last element of the returned DatasetOps (which is now the leaf node in our current
+        // execution tree) to the BFS queue
+        q.push(std::make_pair(child, child_ops->back()));
+      }
+    }
+    RETURN_IF_NOT_OK(tree_->AssignRoot(root_op));
+  }
+
+  // Launch the execution tree.
+  RETURN_IF_NOT_OK(tree_->Prepare());
+  RETURN_IF_NOT_OK(tree_->Launch());
+  iterator_ = std::make_unique<DatasetIterator>(tree_);
+  RETURN_UNEXPECTED_IF_NULL(iterator_);
+
+  return rc;
+}
+
+}  // namespace api
+}  // namespace dataset
+}  // namespace mindspore
--- a/mindspore/ccsrc/dataset/api/python_bindings.cc
+++ b/mindspore/ccsrc/dataset/api/python_bindings.cc
@ -297,7 +297,7 @@ void bindTensor(py::module *m) {
    }))
    .def_buffer([](Tensor &tensor) {
      py::buffer_info info;
-      THROW_IF_ERROR(Tensor::GetBufferInfo(tensor, &info));
+      THROW_IF_ERROR(Tensor::GetBufferInfo(&tensor, &info));
      return info;
    })
    .def("__str__", &Tensor::ToString)
@ -311,7 +311,7 @@ void bindTensor(py::module *m) {
        return res;
      }
      py::buffer_info info;
-      THROW_IF_ERROR(Tensor::GetBufferInfo(tensor, &info));
+      THROW_IF_ERROR(Tensor::GetBufferInfo(&tensor, &info));
      return py::array(pybind11::dtype(info), info.shape, info.strides, info.ptr, t);
    });

--- a/mindspore/ccsrc/dataset/api/samplers.cc
+++ b/mindspore/ccsrc/dataset/api/samplers.cc
@ -0,0 +1,224 @@
+/**
+ * Copyright 2020 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "dataset/include/samplers.h"
+#include "dataset/engine/datasetops/source/sampler/sampler.h"
+#include "dataset/engine/datasetops/source/sampler/distributed_sampler.h"
+#include "dataset/engine/datasetops/source/sampler/random_sampler.h"
+#include "dataset/engine/datasetops/source/sampler/sequential_sampler.h"
+#include "dataset/engine/datasetops/source/sampler/subset_random_sampler.h"
+#include "dataset/engine/datasetops/source/sampler/weighted_random_sampler.h"
+#include "dataset/engine/datasetops/source/sampler/pk_sampler.h"
+
+namespace mindspore {
+namespace dataset {
+namespace api {
+
+SamplerObj::SamplerObj() {}
+
+/// Function to create a Distributed Sampler.
+std::shared_ptr<DistributedSamplerObj> DistributedSampler(int64_t num_shards, int64_t shard_id, bool shuffle,
+                                                          int64_t num_samples, uint32_t seed) {
+  auto sampler = std::make_shared<DistributedSamplerObj>(num_shards, shard_id, shuffle, num_samples, seed);
+  // Input validation
+  if (!sampler->ValidateParams()) {
+    return nullptr;
+  }
+  return sampler;
+}
+
+/// Function to create a PK Sampler.
+std::shared_ptr<PKSamplerObj> PKSampler(int64_t num_val, bool shuffle, int64_t num_samples) {
+  auto sampler = std::make_shared<PKSamplerObj>(num_val, shuffle, num_samples);
+  // Input validation
+  if (!sampler->ValidateParams()) {
+    return nullptr;
+  }
+  return sampler;
+}
+
+/// Function to create a Random Sampler.
+std::shared_ptr<RandomSamplerObj> RandomSampler(bool replacement, int64_t num_samples) {
+  auto sampler = std::make_shared<RandomSamplerObj>(replacement, num_samples);
+  // Input validation
+  if (!sampler->ValidateParams()) {
+    return nullptr;
+  }
+  return sampler;
+}
+
+/// Function to create a Sequential Sampler.
+std::shared_ptr<SequentialSamplerObj> SequentialSampler(int64_t start_index, int64_t num_samples) {
+  auto sampler = std::make_shared<SequentialSamplerObj>(start_index, num_samples);
+  // Input validation
+  if (!sampler->ValidateParams()) {
+    return nullptr;
+  }
+  return sampler;
+}
+
+/// Function to create a Subset Random Sampler.
+std::shared_ptr<SubsetRandomSamplerObj> SubsetRandomSampler(const std::vector<int64_t> &indices, int64_t num_samples) {
+  auto sampler = std::make_shared<SubsetRandomSamplerObj>(indices, num_samples);
+  // Input validation
+  if (!sampler->ValidateParams()) {
+    return nullptr;
+  }
+  return sampler;
+}
+
+/// Function to create a Weighted Random Sampler.
+std::shared_ptr<WeightedRandomSamplerObj> WeightedRandomSampler(const std::vector<double> &weights, int64_t num_samples,
+                                                                bool replacement) {
+  auto sampler = std::make_shared<WeightedRandomSamplerObj>(weights, num_samples, replacement);
+  // Input validation
+  if (!sampler->ValidateParams()) {
+    return nullptr;
+  }
+  return sampler;
+}
+
+/* ####################################### Derived Sampler classes ################################# */
+
+// DistributedSampler
+DistributedSamplerObj::DistributedSamplerObj(int64_t num_shards, int64_t shard_id, bool shuffle, int64_t num_samples,
+                                             uint32_t seed)
+    : num_shards_(num_shards), shard_id_(shard_id), shuffle_(shuffle), num_samples_(num_samples), seed_(seed) {}
+
+bool DistributedSamplerObj::ValidateParams() {
+  if (num_shards_ <= 0) {
+    MS_LOG(ERROR) << "DistributedSampler: invalid num_shards: " << num_shards_;
+    return false;
+  }
+
+  if (shard_id_ < 0 || shard_id_ >= num_shards_) {
+    MS_LOG(ERROR) << "DistributedSampler: invalid input, shard_id: " << shard_id_ << ", num_shards: " << num_shards_;
+    return false;
+  }
+
+  if (num_samples_ < 0) {
+    MS_LOG(ERROR) << "DistributedSampler: invalid num_samples: " << num_samples_;
+    return false;
+  }
+
+  return true;
+}
+
+std::shared_ptr<Sampler> DistributedSamplerObj::Build() {
+  return std::make_shared<dataset::DistributedSampler>(num_samples_, num_shards_, shard_id_, shuffle_, seed_);
+}
+
+// PKSampler
+PKSamplerObj::PKSamplerObj(int64_t num_val, bool shuffle, int64_t num_samples)
+    : num_val_(num_val), shuffle_(shuffle), num_samples_(num_samples) {}
+
+bool PKSamplerObj::ValidateParams() {
+  if (num_val_ <= 0) {
+    MS_LOG(ERROR) << "PKSampler: invalid num_val: " << num_val_;
+    return false;
+  }
+
+  if (num_samples_ < 0) {
+    MS_LOG(ERROR) << "PKSampler: invalid num_samples: " << num_samples_;
+    return false;
+  }
+  return true;
+}
+
+std::shared_ptr<Sampler> PKSamplerObj::Build() {
+  return std::make_shared<dataset::PKSampler>(num_samples_, num_val_, shuffle_);
+}
+
+// RandomSampler
+RandomSamplerObj::RandomSamplerObj(bool replacement, int64_t num_samples)
+    : replacement_(replacement), num_samples_(num_samples) {}
+
+bool RandomSamplerObj::ValidateParams() {
+  if (num_samples_ < 0) {
+    MS_LOG(ERROR) << "RandomSampler: invalid num_samples: " << num_samples_;
+    return false;
+  }
+  return true;
+}
+
+std::shared_ptr<Sampler> RandomSamplerObj::Build() {
+  bool reshuffle_each_epoch = true;
+  auto sampler = std::make_shared<dataset::RandomSampler>(num_samples_, replacement_, reshuffle_each_epoch);
+  return sampler;
+}
+
+// SequentialSampler
+SequentialSamplerObj::SequentialSamplerObj(int64_t start_index, int64_t num_samples)
+    : start_index_(start_index), num_samples_(num_samples) {}
+
+bool SequentialSamplerObj::ValidateParams() {
+  if (num_samples_ < 0) {
+    MS_LOG(ERROR) << "SequentialSampler: invalid num_samples: " << num_samples_;
+    return false;
+  }
+
+  if (start_index_ < 0) {
+    MS_LOG(ERROR) << "SequentialSampler: invalid start_index: " << start_index_;
+    return false;
+  }
+
+  return true;
+}
+
+std::shared_ptr<Sampler> SequentialSamplerObj::Build() {
+  auto sampler = std::make_shared<dataset::SequentialSampler>(num_samples_, start_index_);
+  return sampler;
+}
+
+// SubsetRandomSampler
+SubsetRandomSamplerObj::SubsetRandomSamplerObj(const std::vector<int64_t> &indices, int64_t num_samples)
+    : indices_(indices), num_samples_(num_samples) {}
+
+bool SubsetRandomSamplerObj::ValidateParams() {
+  if (num_samples_ < 0) {
+    MS_LOG(ERROR) << "SubsetRandomSampler: invalid num_samples: " << num_samples_;
+    return false;
+  }
+
+  return true;
+}
+
+std::shared_ptr<Sampler> SubsetRandomSamplerObj::Build() {
+  auto sampler = std::make_shared<dataset::SubsetRandomSampler>(num_samples_, indices_);
+  return sampler;
+}
+
+// WeightedRandomSampler
+WeightedRandomSamplerObj::WeightedRandomSamplerObj(const std::vector<double> &weights, int64_t num_samples,
+                                                   bool replacement)
+    : weights_(weights), num_samples_(num_samples), replacement_(replacement) {}
+
+bool WeightedRandomSamplerObj::ValidateParams() {
+  if (num_samples_ < 0) {
+    MS_LOG(ERROR) << "WeightedRandomSampler: invalid num_samples: " << num_samples_;
+    return false;
+  }
+  return true;
+}
+
+std::shared_ptr<Sampler> WeightedRandomSamplerObj::Build() {
+  auto sampler = std::make_shared<dataset::WeightedRandomSampler>(num_samples_, weights_, replacement_);
+  return sampler;
+}
+
+}  // namespace api
+}  // namespace dataset
+}  // namespace mindspore
--- a/mindspore/ccsrc/dataset/api/transforms.cc
+++ b/mindspore/ccsrc/dataset/api/transforms.cc
--- a/mindspore/ccsrc/dataset/core/CMakeLists.txt
+++ b/mindspore/ccsrc/dataset/core/CMakeLists.txt
@ -1,10 +1,6 @@
-ms_protobuf_generate(EXAMPLE_SRCS EXAMPLE_HDRS example.proto)
-ms_protobuf_generate(FEATURE_SRCS FEATURE_HDRS feature.proto)
 file(GLOB_RECURSE _CURRENT_SRC_FILES RELATIVE ${CMAKE_CURRENT_SOURCE_DIR} "*.cc")
 set_property(SOURCE ${_CURRENT_SRC_FILES} PROPERTY COMPILE_DEFINITIONS SUBMODULE_ID=mindspore::SubModuleId::SM_MD)
-add_library(core OBJECT
-  ${EXAMPLE_SRCS}
-  ${FEATURE_SRCS}
+set(DATASET_CORE_SRC_FILES
  client.cc
  config_manager.cc
  cv_tensor.cc
@ -13,6 +9,13 @@ add_library(core OBJECT
  tensor.cc
  tensor_row.cc
  tensor_shape.cc
-  )
+)
+
+ms_protobuf_generate(EXAMPLE_SRCS EXAMPLE_HDRS example.proto)
+ms_protobuf_generate(FEATURE_SRCS FEATURE_HDRS feature.proto)
+add_library(core OBJECT ${DATASET_CORE_SRC_FILES} ${EXAMPLE_SRCS} ${FEATURE_SRCS})
 add_dependencies(core mindspore::protobuf)
-target_include_directories(core PRIVATE ${pybind11_INCLUDE_DIRS})
+
+if (ENABLE_PYTHON)
+  target_include_directories(core PRIVATE ${pybind11_INCLUDE_DIRS})
+endif()
--- a/mindspore/ccsrc/dataset/core/client.h
+++ b/mindspore/ccsrc/dataset/core/client.h
@ -25,21 +25,25 @@
 #include "dataset/core/tensor_shape.h"
 #include "dataset/engine/data_schema.h"
 #include "dataset/engine/dataset_iterator.h"
+#include "dataset/engine/datasetops/source/mindrecord_op.h"
+#include "dataset/engine/datasetops/source/tf_reader_op.h"
+
+#ifdef ENABLE_PYTHON
 #include "dataset/engine/datasetops/barrier_op.h"
-#include "dataset/engine/datasetops/batch_op.h"
+#include "dataset/engine/datasetops/filter_op.h"
+#include "dataset/engine/datasetops/source/generator_op.h"
 #include "dataset/engine/datasetops/build_vocab_op.h"
+#endif
+
+#include "dataset/engine/datasetops/batch_op.h"
 #include "dataset/engine/datasetops/dataset_op.h"
 #include "dataset/engine/datasetops/device_queue_op.h"
 #include "dataset/engine/datasetops/map_op.h"
 #include "dataset/engine/datasetops/project_op.h"
 #include "dataset/engine/datasetops/rename_op.h"
-#include "dataset/engine/datasetops/filter_op.h"
 #include "dataset/engine/datasetops/repeat_op.h"
 #include "dataset/engine/datasetops/skip_op.h"
 #include "dataset/engine/datasetops/shuffle_op.h"
-#include "dataset/engine/datasetops/source/generator_op.h"
-#include "dataset/engine/datasetops/source/mindrecord_op.h"
-#include "dataset/engine/datasetops/source/tf_reader_op.h"
 #include "dataset/engine/datasetops/take_op.h"
 #include "dataset/engine/datasetops/zip_op.h"
 #include "dataset/engine/datasetops/concat_op.h"
--- a/mindspore/ccsrc/dataset/core/constants.h
+++ b/mindspore/ccsrc/dataset/core/constants.h
@ -32,6 +32,12 @@ enum class DatasetType { kUnknown, kArrow, kTf };
 // Possible flavours of Tensor implementations
 enum class TensorImpl { kNone, kFlexible, kCv, kNP };

+// Possible values for Border types
+enum class BorderType { kConstant = 0, kEdge = 1, kReflect = 2, kSymmetric = 3 };
+
+// Possible interpolation modes
+enum class InterpolationMode { kLinear = 0, kNearestNeighbour = 1, kCubic = 2, kArea = 3 };
+
 // convenience functions for 32bit int bitmask
 inline bool BitTest(uint32_t bits, uint32_t bitMask) { return (bits & bitMask) == bitMask; }

--- a/mindspore/ccsrc/dataset/core/data_type.cc
+++ b/mindspore/ccsrc/dataset/core/data_type.cc
@ -14,11 +14,12 @@
 * limitations under the License.
 */
 #include "dataset/core/data_type.h"
+#ifdef ENABLE_PYTHON
+#include "dataset/core/pybind_support.h"
+#endif

 #include "utils/log_adapter.h"

-#include "dataset/core/pybind_support.h"
-
 namespace mindspore {
 namespace dataset {

@ -29,12 +30,14 @@ uint8_t DataType::SizeInBytes() const {
    return 0;
 }

+#ifdef ENABLE_PYTHON
 py::dtype DataType::AsNumpyType() const {
  if (type_ < DataType::NUM_OF_TYPES)
    return py::dtype(kTypeInfo[type_].pybindType_);
  else
    return py::dtype("unknown");
 }
+#endif

 uint8_t DataType::AsCVType() const {
  uint8_t res = kCVInvalidType;
@ -112,6 +115,7 @@ std::string DataType::ToString() const {
    return "unknown";
 }

+#ifdef ENABLE_PYTHON
 DataType DataType::FromNpArray(const py::array &arr) {
  if (py::isinstance<py::array_t<bool>>(arr)) {
    return DataType(DataType::DE_BOOL);
@ -156,6 +160,7 @@ std::string DataType::GetPybindFormat() const {
  }
  return res;
 }
+#endif

 }  // namespace dataset
 }  // namespace mindspore
--- a/mindspore/ccsrc/dataset/core/data_type.h
+++ b/mindspore/ccsrc/dataset/core/data_type.h
@ -19,14 +19,16 @@
 #include <opencv2/core/hal/interface.h>

 #include <string>
-
+#ifdef ENABLE_PYTHON
 #include "pybind11/numpy.h"
 #include "pybind11/pybind11.h"
-
-#include "dataset/core/constants.h"
 #include "dataset/core/pybind_support.h"
-
 namespace py = pybind11;
+#else
+#include "Eigen/Core"
+using float16 = Eigen::half;
+#endif
+#include "dataset/core/constants.h"
 namespace mindspore {
 namespace dataset {

@ -59,6 +61,7 @@ class DataType {
    const uint8_t cvType_;                      // OpenCv matching type
  };

+#ifdef ENABLE_PYTHON
  static inline const TypeInfo kTypeInfo[] = {
    // name, sizeInBytes, pybindTypem formatDescriptor, openCV
    {"unknown", 0, "object", "", kCVInvalidType},                                        // DE_UNKNOWN
@ -76,19 +79,38 @@ class DataType {
    {"float64", 8, "double", py::format_descriptor<double>::format(), CV_64F},           // DE_FLOAT64
    {"string", 0, "bytes", "S", kCVInvalidType}                                          // DE_STRING
  };
+#else
+  static inline const TypeInfo kTypeInfo[] = {
+    // name, sizeInBytes, pybindTypem formatDescriptor, openCV
+    {"unknown", 0, "object", "", kCVInvalidType},  // DE_UNKNOWN
+    {"bool", 1, "bool", "", CV_8U},                // DE_BOOL
+    {"int8", 1, "int8", "", CV_8S},                // DE_INT8
+    {"uint8", 1, "uint8", "", CV_8U},              // DE_UINT8
+    {"int16", 2, "int16", "", CV_16S},             // DE_INT16
+    {"uint16", 2, "uint16", "", CV_16U},           // DE_UINT16
+    {"int32", 4, "int32", "", CV_32S},             // DE_INT32
+    {"uint32", 4, "uint32", "", kCVInvalidType},   // DE_UINT32
+    {"int64", 8, "int64", "", kCVInvalidType},     // DE_INT64
+    {"uint64", 8, "uint64", "", kCVInvalidType},   // DE_UINT64
+    {"float16", 2, "float16", "", CV_16F},         // DE_FLOAT16
+    {"float32", 4, "float32", "", CV_32F},         // DE_FLOAT32
+    {"float64", 8, "double", "", CV_64F},          // DE_FLOAT64
+    {"string", 0, "bytes", "", kCVInvalidType}     // DE_STRING
+  };
+#endif

  // No arg constructor to create an unknown shape
  DataType() : type_(DE_UNKNOWN) {}

  // Create a type from a given string
-  // @param type_str
+  /// \param type_str
  explicit DataType(const std::string &type_str);

  // Default destructor
  ~DataType() = default;

  // Create a type from a given enum
-  // @param d
+  /// \param d
  constexpr explicit DataType(Type d) : type_(d) {}

  constexpr bool operator==(const DataType a) const { return type_ == a.type_; }
@ -100,49 +122,49 @@ class DataType {
  constexpr bool operator!=(const Type a) const { return type_ != a; }

  // Disable this usage `if(d)` where d is of type DataType
-  // @return
+  /// \return
  operator bool() = delete;

  // To be used in Switch/case
-  // @return
+  /// \return
  operator Type() const { return type_; }

  // The number of bytes needed to store one value of this type
-  // @return
+  /// \return
  uint8_t SizeInBytes() const;

  // Convert from DataType to OpenCV type
-  // @return
+  /// \return
  uint8_t AsCVType() const;

  // Convert from OpenCV type to DataType
-  // @param cv_type
-  // @return
+  /// \param cv_type
+  /// \return
  static DataType FromCVType(int cv_type);

  // Returns a string representation of the type
-  // @return
+  /// \return
  std::string ToString() const;

  // returns true if the template type is the same as the Tensor type_
-  // @tparam T
-  // @return true or false
+  /// \tparam T
+  /// \return true or false
  template <typename T>
  bool IsCompatible() const {
    return type_ == FromCType<T>();
  }

  // returns true if the template type is the same as the Tensor type_
-  // @tparam T
-  // @return true or false
+  /// \tparam T
+  /// \return true or false
  template <typename T>
  bool IsLooselyCompatible() const;

  // << Stream output operator overload
-  // @notes This allows you to print the info using stream operators
-  // @param out - reference to the output stream being overloaded
-  // @param rO - reference to the DataType to display
-  // @return - the output stream must be returned
+  /// \notes This allows you to print the info using stream operators
+  /// \param out - reference to the output stream being overloaded
+  /// \param rO - reference to the DataType to display
+  /// \return - the output stream must be returned
  friend std::ostream &operator<<(std::ostream &out, const DataType &so) {
    out << so.ToString();
    return out;
@ -151,22 +173,24 @@ class DataType {
  template <typename T>
  static DataType FromCType();

+#ifdef ENABLE_PYTHON
  // Convert from DataType to Pybind type
-  // @return
+  /// \return
  py::dtype AsNumpyType() const;

  // Convert from NP type to DataType
-  // @param type
-  // @return
+  /// \param type
+  /// \return
  static DataType FromNpType(const py::dtype &type);

  // Convert from NP array to DataType
-  // @param py array
-  // @return
+  /// \param py array
+  /// \return
  static DataType FromNpArray(const py::array &arr);
+#endif

  // Get the buffer string format of the current type. Used in pybind buffer protocol.
-  // @return
+  /// \return
  std::string GetPybindFormat() const;

  bool IsSignedInt() const {
--- a/mindspore/ccsrc/dataset/core/tensor.cc
+++ b/mindspore/ccsrc/dataset/core/tensor.cc
@ -28,10 +28,12 @@
 #include "dataset/core/constants.h"
 #include "dataset/core/cv_tensor.h"
 #include "dataset/core/global_context.h"
+#ifdef ENABLE_PYTHON
 #include "dataset/core/pybind_support.h"
+namespace py = pybind11;
+#endif
 #include "dataset/core/tensor_shape.h"

-namespace py = pybind11;
 namespace mindspore {
 namespace dataset {
 // Helper macros for printing tensor elements
@ -155,6 +157,7 @@ Tensor::Tensor(const std::vector<std::string> &strings, const TensorShape &shape
  MS_ASSERT(num_bytes == 0);
  if (shape.known()) Tensor::Reshape(shape);
 }
+
 Tensor::Tensor(const dataengine::BytesList &bytes_list, const TensorShape &shape)
    : Tensor(TensorShape({static_cast<dsize_t>(bytes_list.value_size())}), DataType(DataType::DE_STRING)) {
  // total bytes needed = offset array + strings
@ -194,6 +197,7 @@ Tensor::Tensor(const dataengine::BytesList &bytes_list, const TensorShape &shape
  MS_ASSERT(num_bytes == 0);
  if (shape.known()) Tensor::Reshape(shape);
 }
+
 Status Tensor::CreateTensor(std::shared_ptr<Tensor> *ptr, TensorImpl tensor_impl, const TensorShape &shape,
                            DataType type, const unsigned char *data) {
  if (!shape.known()) {
@ -223,6 +227,7 @@ Status Tensor::CreateTensor(std::shared_ptr<Tensor> *ptr, TensorImpl tensor_impl
  return Status::OK();  // returns base-class shared_ptr
 }

+#ifdef ENABLE_PYTHON
 Status Tensor::CreateTensorFromNumpyString(std::shared_ptr<Tensor> *ptr, py::array arr) {
  std::vector<dsize_t> shape;
  for (dsize_t i = 0; i < arr.ndim(); i++) {
@ -297,6 +302,7 @@ Status Tensor::CreateTensor(std::shared_ptr<Tensor> *ptr, py::array arr) {

  return Status::OK();  // returns base-class shared_ptr
 }
+#endif

 Status Tensor::CreateTensor(std::shared_ptr<Tensor> *ptr, const std::vector<std::string> &strings,
                            const TensorShape &shape) {
@ -698,21 +704,24 @@ std::vector<dsize_t> Tensor::Strides() {
  return strides;
 }

-Status Tensor::GetBufferInfo(Tensor &t, py::buffer_info *out) {
-  CHECK_FAIL_RETURN_UNEXPECTED(t.type().IsNumeric(), "Cannot use GetBufferInfo on tensor of strings.");
+#ifdef ENABLE_PYTHON
+Status Tensor::GetBufferInfo(Tensor *t, py::buffer_info *out) {
+  RETURN_UNEXPECTED_IF_NULL(t);
+  CHECK_FAIL_RETURN_UNEXPECTED(t->type().IsNumeric(), "Cannot use GetBufferInfo on tensor of strings.");

-  std::string format_desc = t.type().GetPybindFormat();
+  std::string format_desc = t->type().GetPybindFormat();
  if (format_desc.empty()) {
    RETURN_STATUS_UNEXPECTED("Cannot convert DE type tp pybind format");
  }
-  *out = py::buffer_info(t.GetMutableBuffer(),   /* Pointer to buffer */
-                         t.type().SizeInBytes(), /* Size of one scalar */
-                         format_desc,            /* Python struct-style format descriptor */
-                         t.Rank(),               /* Number of dimensions */
-                         t.shape().AsVector(),   /* Buffer dimensions */
-                         t.Strides());
+  *out = py::buffer_info(t->GetMutableBuffer(),   /* Pointer to buffer */
+                         t->type().SizeInBytes(), /* Size of one scalar */
+                         format_desc,             /* Python struct-style format descriptor */
+                         t->Rank(),               /* Number of dimensions */
+                         t->shape().AsVector(),   /* Buffer dimensions */
+                         t->Strides());
  return Status::OK();
 }
+#endif

 template <typename T>
 Status Tensor::GetItemAt(T *o, const std::vector<dsize_t> &index) const {
@ -752,6 +761,8 @@ Status Tensor::GetItemAt(std::string_view *o, const std::vector<dsize_t> &index)
  o->swap(sv);
  return Status::OK();
 }
+
+#ifdef ENABLE_PYTHON
 // return data as numpy, should return status
 Status Tensor::GetDataAsNumpy(py::array *data) {
  RETURN_UNEXPECTED_IF_NULL(data_);
@ -815,6 +826,7 @@ Status Tensor::GetDataAsNumpyStrings(py::array *data) {
  data_allocator_->deallocate(reinterpret_cast<uchar *>(tmp_data));
  return Status::OK();
 }
+#endif

 void Tensor::Squeeze() { shape_ = shape_.Squeeze(); }

--- a/mindspore/ccsrc/dataset/core/tensor.h
+++ b/mindspore/ccsrc/dataset/core/tensor.h
@ -26,20 +26,27 @@
 #undef HAVE_STDDEF_H
 #undef HAVE_STDLIB_H
 #endif
+
+#ifdef ENABLE_PYTHON
 #include "pybind11/numpy.h"
 #include "pybind11/pybind11.h"
 #include "pybind11/stl.h"
+#endif
+
 #include "dataset/core/constants.h"
 #include "dataset/core/data_type.h"
 #include "dataset/core/tensor_shape.h"
-#include "dataset/util/allocator.h"
 #include "dataset/util/status.h"
 #include "proto/example.pb.h"

+#ifdef ENABLE_PYTHON
 namespace py = pybind11;
+#endif
 namespace mindspore {
 namespace dataset {
 class Tensor;
+template <typename T>
+class Allocator;

 using CharAllocPtr = std::unique_ptr<Allocator<unsigned char>>;
 using TensorAllocPtr = std::shared_ptr<Allocator<Tensor>>;  // An allocator shared_ptr for Tensors
@ -114,16 +121,17 @@ class Tensor {
  static Status CreateTensor(std::shared_ptr<Tensor> *, TensorImpl tensor_impl, const TensorShape &shape, DataType type,
                             const unsigned char *data = nullptr);

-  /// Create a copy of the input tensor
-  /// \param out [out] output tensor to be generated
-  /// \param in [in] orginal tensor to be copied
-  /// \return Status
+  // Create a copy of the input tensor
+  // @param out [out] output tensor to be generated
+  // @param in [in] orginal tensor to be copied
+  // @return Status
  static Status CreateTensor(std::shared_ptr<Tensor> *out, const std::shared_ptr<Tensor> &in) {
    const TensorAlloc *alloc = GlobalContext::Instance()->tensor_allocator();
    *out = std::allocate_shared<Tensor>(*alloc, in->shape(), in->type(), in->GetBuffer(), in->SizeInBytes());
    return Status::OK();
  }

+#ifdef ENABLE_PYTHON
  // A static factory method to create a Tensor from a given py::array.
  // @param ptr output argument to hold the created Tensor
  // @param arr py::array
@ -132,6 +140,7 @@ class Tensor {

  // Helper function to create a tensor from Numpy of strings
  static Status CreateTensorFromNumpyString(std::shared_ptr<Tensor> *ptr, py::array arr);
+#endif

  // A static factory method to create a Tensor from a given list of strings.
  // @param ptr output argument to hold the created Tensor
@ -170,6 +179,7 @@ class Tensor {
  static Status CreateTensor(std::shared_ptr<Tensor> *ptr, const T &item) {
    return CreateTensor<T>(ptr, {item}, TensorShape::CreateScalar());
  }
+
  // Create tensor from protobuf bytelist with uint8 or int8 types
  static Status CreateTensor(std::shared_ptr<Tensor> *ptr, const dataengine::BytesList &bytes_list,
                             const TensorShape &shape, const DataType &type, dsize_t pad_size);
@ -346,12 +356,12 @@ class Tensor {

  virtual void Squeeze();

-  /// Calculates the strides of the Tensor
-  /// Ex: Tensor of shape <4,2,2> and type DE_UINT8 (1 byte)
-  /// The strides will be {6,2,1}.
-  /// Ex: Tensor of shape <4,2,2> and type DE_UINT32 (4 byte)
-  /// The strides will be {24,8,4}.
-  /// @return vector of integers
+  // Calculates the strides of the Tensor
+  // Ex: Tensor of shape <4,2,2> and type DE_UINT8 (1 byte)
+  // The strides will be {6,2,1}.
+  // Ex: Tensor of shape <4,2,2> and type DE_UINT32 (4 byte)
+  // The strides will be {24,8,4}.
+  // @return vector of integers
  std::vector<dsize_t> Strides();

  std::string ToString() {
@ -376,6 +386,7 @@ class Tensor {
  // Slice string tensors
  Status SliceString(std::shared_ptr<Tensor> *out, const std::vector<dsize_t> &indices);

+#ifdef ENABLE_PYTHON
  // Constructs numpy array from input tensor
  // @param data this data is the location of python data
  // @return Status code
@ -383,7 +394,8 @@ class Tensor {

  Status GetDataAsNumpyStrings(py::array *data);

-  static Status GetBufferInfo(Tensor &t, py::buffer_info *out);
+  static Status GetBufferInfo(Tensor *t, py::buffer_info *out);
+#endif

  // Concatenate based on given tensor, can fill in current tensor with a smaller one, unlike InsertTensor
  Status Concatenate(const std::vector<dsize_t> &index, const std::shared_ptr<Tensor> &input);
@ -570,7 +582,7 @@ class Tensor {

  // Return a TensorIterator that points to the start of the Tensor.
  // It's the user responsibility to use the correct type that matches the Tensor type
-  // @tparam T The type of values in the Tensor
+  // @param T The type of values in the Tensor
  // @return TensorIterator
  template <typename T>
  TensorIterator<T> begin() {
--- a/mindspore/ccsrc/dataset/core/tensor_row.cc
+++ b/mindspore/ccsrc/dataset/core/tensor_row.cc
@ -18,7 +18,6 @@

 #include "dataset/core/tensor_row.h"

-namespace py = pybind11;
 namespace mindspore {
 namespace dataset {

--- a/mindspore/ccsrc/dataset/core/tensor_shape.cc
+++ b/mindspore/ccsrc/dataset/core/tensor_shape.cc
@ -77,6 +77,7 @@ TensorShape::TensorShape(const TensorShape &shape)
  known_ = shape.known_;  // override with the input shape in case of unknown-rank tensor shape.
 }

+#ifdef ENABLE_PYTHON
 TensorShape::TensorShape(py::list l)
    : raw_shape_(*GlobalContext::Instance()->int_allocator()), strides_(*GlobalContext::Instance()->int_allocator()) {
  std::vector<dsize_t> list_c;
@ -89,6 +90,7 @@ TensorShape::TensorShape(py::list l)
  }
  AddListToShape(list_c);
 }
+#endif

 TensorShape::TensorShape(cv::MatSize cv_size, uint32_t type)
    : raw_shape_(*GlobalContext::Instance()->int_allocator()), strides_(*GlobalContext::Instance()->int_allocator()) {
@ -197,6 +199,7 @@ TensorShape TensorShape::AppendDim(dsize_t dim) const {
  return TensorShape(vec);
 }

+#ifdef ENABLE_PYTHON
 py::list TensorShape::AsPyList() {
  py::list list;
  for (auto i : raw_shape_) {
@ -204,6 +207,7 @@ py::list TensorShape::AsPyList() {
  }
  return list;
 }
+#endif

 TensorShape TensorShape::Squeeze() const {
  std::vector<dsize_t> new_shape;
--- a/mindspore/ccsrc/dataset/core/tensor_shape.h
+++ b/mindspore/ccsrc/dataset/core/tensor_shape.h
@ -24,13 +24,16 @@

 #include <opencv2/core/mat.hpp>

+#ifdef ENABLE_PYTHON
 #include "pybind11/pybind11.h"
+namespace py = pybind11;
+#endif

 #include "dataset/core/constants.h"
+#include "dataset/util/status.h"
 #include "dataset/core/global_context.h"
 #include "dataset/util/allocator.h"

-namespace py = pybind11;
 namespace mindspore {
 namespace dataset {
 // Class that represents a shape of a Tensor. A shape can be:
@ -43,7 +46,8 @@ namespace dataset {
 //        -# one or more dim is unknown --> not empty vector --> <d1, d2, d2, d3, ...> where di is unknown\n
 //           Example: <3,?> (the 1st dim is unknown)\n
 //              <2,?,?,?> (all dims but the 0th dim are unknown)
-//  TensorShape supports any dim > 0 and < 2^31-1
+
+/// \brief  TensorShape supports any dim > 0 and < 2^31-1
 class TensorShape {
 public:
  static constexpr dsize_t kDimUnknown = -1;  // constant for an unknown dimension
@ -51,57 +55,59 @@ class TensorShape {
  // Force the compiler to not create a no-arg constructor
  TensorShape() = delete;

-  // Create a Shape from an initialization list (e.g., TensorShape s = {2,2}).
-  // If one of the dims is set to DIM_UNKNOWN, the shape will flagged as unKnown
-  // @param list
+  /// \brief Create a Shape from an initialization list (e.g., TensorShape s = {2,2}).
+  ///     If one of the dims is set to DIM_UNKNOWN, the shape will flagged as unKnown
+  /// \param[in] list
  explicit TensorShape(const std::initializer_list<dsize_t> &list);

-  // Create a Shape from a vector (e.g., TensorShape s = std::vector<dsize_t>({2,2}) ).
-  // If one of the dims is set to DIM_UNKNOWN, the shape will flagged as unKnown
-  // @param list
+  /// \brief Create a Shape from a vector (e.g., TensorShape s = std::vector<dsize_t>({2,2}) ).
+  ///     If one of the dims is set to DIM_UNKNOWN, the shape will flagged as unKnown
+  /// \param[in] list
  explicit TensorShape(const std::vector<dsize_t> &list);

-  // Copy constructor
-  // @param shape
+  /// \brief Copy constructor
+  /// \param[in] shape
  TensorShape(const TensorShape &shape);

-  // construct a TensorShape via a python list
-  // @param py::list l - a list object from python
+#ifdef ENABLE_PYTHON
+  /// \brief construct a TensorShape via a python list
+  /// \param[in] py::list l - a list object from python
  explicit TensorShape(py::list l);
+#endif

  ~TensorShape() = default;

-  // Create a scalar Shape (i.e., empty shape with mKnown = true)
-  // @return TensorShape
+  /// \brief Create a scalar Shape (i.e., empty shape with mKnown = true)
+  /// \return TensorShape
  static TensorShape CreateScalar() { return TensorShape({}); }

-  // Create a shape with an unknown rank.
-  // @return TensorShape
+  /// \brief Create a shape with an unknown rank.
+  /// \return TensorShape
  static TensorShape CreateUnknownRankShape();

-  // Create a shape with a known rank .
-  // @return TensorShape
+  /// \brief Create a shape with a known rank .
+  /// \return TensorShape
  static TensorShape CreateUnknownShapeWithRank(dsize_t rank);

-  // Insert a new dim into a copy of the current shape.
-  // @param dim to be added
-  // @param axis the index where dim should be added
-  // @return New modified shape
+  /// \brief Insert a new dim into a copy of the current shape.
+  /// \param[in] dim to be added
+  /// \param[in] axis the index where dim should be added
+  /// \return New modified shape
  TensorShape InsertDim(dsize_t axis, dsize_t dim) const;

-  // Insert new dim at index 0. For example,  <2,4> --> PrependDim(4) --> <4,2,4>
-  // @param dim
-  // @return
+  /// \brief Insert new dim at index 0. For example,  <2,4> --> PrependDim(4) --> <4,2,4>
+  /// \param[in] dim
+  /// \return
  TensorShape PrependDim(dsize_t dim) const;

-  // Insert a new dim at the end of the shape. For example,  <2,4> --> AppendDim(4) --> <2,4,4>
-  // @param dim
-  // @return
+  /// \brief Insert a new dim at the end of the shape. For example,  <2,4> --> AppendDim(4) --> <2,4,4>
+  /// \param[in] dim
+  /// \return
  TensorShape AppendDim(dsize_t dim) const;

-  // Create a shape based on OpenCV shape and type
-  // @param cv_size
-  // @param type int that represent the type in OpenCV, example CV_8U, CV_64S
+  /// \brief Create a shape based on OpenCV shape and type
+  /// \param[in] cv_size
+  /// \param[in] type int that represent the type in OpenCV, example CV_8U, CV_64S
  TensorShape(cv::MatSize cv_size, uint32_t type);

  dsize_t Size() const { return raw_shape_.size(); }
@ -123,47 +129,50 @@ class TensorShape {
    return raw_shape_[index];
  }

-  // Return the Shape as a vector
-  // @return
+  /// \brief Return the Shape as a vector
+  /// \return
  std::vector<dsize_t> AsVector() const;

-  // Returns the class info as a string
-  // @return
+  /// \brief Returns the class info as a string
+  /// \return
  std::string ToString() const {
    std::stringstream ss;
    ss << *this;
    return ss.str();
  }

-  // Actual print function used by operator<<
-  // @param out output string stream
+  /// \brief Actual print function used by operator<<
+  /// \param out output string stream
  void Print(std::ostream &out) const;

-  // << Stream output operator overload
-  // @notes This allows you to print the info using stream operators
-  // @param out - reference to the output stream being overloaded
-  // @param rO - reference to the TensorShape to display
-  // @return - the output stream must be returned
+  /// \brief << Stream output operator overload
+  ///     This allows you to print the info using stream operators
+  /// \param[in] out - reference to the output stream being overloaded
+  /// \param[in] rO - reference to the TensorShape to display
+  /// \return - the output stream must be returned
  friend std::ostream &operator<<(std::ostream &out, const TensorShape &so) {
    so.Print(out);
    return out;
  }

+#ifdef ENABLE_PYTHON
  py::list AsPyList();
+#endif

-  // Checks if the given index is a valid index for this tensor.
-  // For example: Tensor<3,4> Index<1,1> is valid. But Index<4,1> or <1> are not.
-  // @param index
-  // @return bool
+  /// \brief Checks if the given index is a valid index for this tensor.
+  ///     For example: Tensor<3,4> Index<1,1> is valid. But Index<4,1> or <1> are not.
+  /// \param[in] index
+  /// \return bool
  bool IsValidIndex(const std::vector<dsize_t> &index) const;

  TensorShape Squeeze() const;

  std::vector<dsize_t> Strides() const;

-  // Returns the location of the item assuming row major memory layout.
-  // @param index
-  // @return
+  /// \brief Returns the location of the item assuming row major memory layout.
+  /// \param[in] index
+  /// \param[out] flat_index
+  /// \return
  Status ToFlatIndex(const std::vector<dsize_t> &index, dsize_t *flat_index) const;

 private:
@ -174,11 +183,11 @@ class TensorShape {
  // Vector to keep the strides of the shape. The size is rank+1
  std::vector<dsize_t, IntAlloc> strides_;

-  // Internal utility function to iterate over a list, check if the dim is valid and then insert it into the shape.
-  // @tparam T list
-  // @param list Iterable list
-  // @return true if the shape is valid and no overflow would be generated when counting the number of elements.
-  //         False otherwise.
+  /// \brief Internal utility function to iterate over a list,
+  ///     check if the dim is valid and then insert it into the shape.
+  /// \param[in] list Iterable list
+  /// \return true if the shape is valid and no overflow would be generated when counting the number of elements.
+  ///     False otherwise.
  template <typename T>
  void AddListToShape(const T &list);
 };
--- a/mindspore/ccsrc/dataset/engine/datasetops/CMakeLists.txt
+++ b/mindspore/ccsrc/dataset/engine/datasetops/CMakeLists.txt
@ -2,13 +2,12 @@ add_subdirectory(source)

 file(GLOB_RECURSE _CURRENT_SRC_FILES RELATIVE ${CMAKE_CURRENT_SOURCE_DIR} "*.cc")
 set_property(SOURCE ${_CURRENT_SRC_FILES} PROPERTY COMPILE_DEFINITIONS SUBMODULE_ID=mindspore::SubModuleId::SM_MD)
-add_library(engine-datasetops OBJECT
+
+set(DATASET_ENGINE_DATASETOPS_SRC_FILES
    dataset_op.cc
    parallel_op.cc
    pipeline_op.cc
-    barrier_op.cc
    batch_op.cc
-    bucket_batch_by_length_op.cc
    device_queue_op.cc
    map_op.cc
    project_op.cc
@ -18,8 +17,18 @@ add_library(engine-datasetops OBJECT
    take_op.cc
    shuffle_op.cc
    zip_op.cc
-    concat_op.cc
-    filter_op.cc
-    build_vocab_op.cc
+    concat_op.cc 
    )

+if (ENABLE_PYTHON)
+    set(DATASET_ENGINE_DATASETOPS_SRC_FILES
+        ${DATASET_ENGINE_DATASETOPS_SRC_FILES}
+        bucket_batch_by_length_op.cc
+        barrier_op.cc
+        filter_op.cc
+        build_vocab_op.cc
+        )
+endif()
+
+add_library(engine-datasetops OBJECT ${DATASET_ENGINE_DATASETOPS_SRC_FILES})
+
--- a/mindspore/ccsrc/dataset/engine/datasetops/batch_op.cc
+++ b/mindspore/ccsrc/dataset/engine/datasetops/batch_op.cc
@ -19,7 +19,9 @@
 #include <iomanip>

 #include "common/utils.h"
+#ifdef ENABLE_PYTHON
 #include "dataset/core/pybind_support.h"
+#endif
 #include "dataset/engine/data_buffer.h"
 #include "dataset/engine/db_connector.h"
 #include "dataset/engine/opt/pass.h"
@ -38,9 +40,14 @@ BatchOp::Builder::Builder(int32_t batch_size) : builder_drop_(false), builder_pa

 Status BatchOp::Builder::Build(std::shared_ptr<BatchOp> *ptr) {
  RETURN_IF_NOT_OK(SanityCheck());
+#ifdef ENABLE_PYTHON
  *ptr = std::make_shared<BatchOp>(builder_batch_size_, builder_drop_, builder_pad_, builder_op_connector_size_,
                                   builder_num_workers_, builder_cols_to_map_, builder_batch_size_func_,
                                   builder_batch_map_func_, builder_pad_map_);
+#else
+  *ptr = std::make_shared<BatchOp>(builder_batch_size_, builder_drop_, builder_pad_, builder_op_connector_size_,
+                                   builder_num_workers_, builder_cols_to_map_, builder_pad_map_);
+#endif
  return Status::OK();
 }

@ -52,6 +59,7 @@ Status BatchOp::Builder::SanityCheck() {
  return err.empty() ? Status::OK() : Status(StatusCode::kUnexpectedError, __LINE__, __FILE__, common::SafeCStr(err));
 }

+#ifdef ENABLE_PYTHON
 BatchOp::BatchOp(int32_t batch_size, bool drop, bool pad, int32_t op_queue_size, int32_t num_workers,
                 const std::vector<std::string> &cols_to_map, py::function batch_size_func, py::function batch_map_func,
                 PadInfo pad_map)
@ -65,6 +73,18 @@ BatchOp::BatchOp(int32_t batch_size, bool drop, bool pad, int32_t op_queue_size,
      pad_info_(pad_map) {
  worker_queues_.Init(num_workers, op_queue_size);
 }
+#else
+BatchOp::BatchOp(int32_t batch_size, bool drop, bool pad, int32_t op_queue_size, int32_t num_workers,
+                 const std::vector<std::string> &cols_to_map, PadInfo pad_map)
+    : ParallelOp(num_workers, op_queue_size),
+      start_batch_size_(batch_size),
+      drop_(drop),
+      pad_(pad),
+      pyfunc_column_names_(cols_to_map),
+      pad_info_(pad_map) {
+  worker_queues_.Init(num_workers, op_queue_size);
+}
+#endif

 Status BatchOp::operator()() {
  Status rc = LaunchThreadsAndInitOp();
@ -206,7 +226,9 @@ Status BatchOp::WorkerEntry(int32_t workerId) {
 Status BatchOp::MakeBatchedBuffer(std::pair<std::unique_ptr<TensorQTable>, CBatchInfo> table_pair,
                                  std::unique_ptr<DataBuffer> *db) {
  RETURN_UNEXPECTED_IF_NULL(table_pair.first);
-  if (!pyfunc_column_names_.empty()) RETURN_IF_NOT_OK(MapColumns(&table_pair));               // pass it through pyfunc
+#ifdef ENABLE_PYTHON
+  if (!pyfunc_column_names_.empty()) RETURN_IF_NOT_OK(MapColumns(&table_pair));  // pass it through pyfunc
+#endif
  if (pad_) RETURN_IF_NOT_OK(PadColumns(&table_pair.first, pad_info_, column_name_id_map_));  // do padding if needed
  (*db) = std::make_unique<DataBuffer>(table_pair.second.batch_num_, DataBuffer::kDeBFlagNone);
  std::unique_ptr<TensorQTable> dest_table = std::make_unique<TensorQTable>();
@ -229,6 +251,7 @@ Status BatchOp::EoeReceived(int32_t) {
  return Status::OK();
 }

+#ifdef ENABLE_PYTHON
 Status BatchOp::MapColumns(std::pair<std::unique_ptr<TensorQTable>, CBatchInfo> *table_pair) {
  TensorBatchTable input_table;
  input_table.reserve(pyfunc_column_names_.size());
@ -259,16 +282,22 @@ Status BatchOp::MapColumns(std::pair<std::unique_ptr<TensorQTable>, CBatchInfo>
  }
  return Status::OK();
 }
+#endif

 Status BatchOp::GetBatchSize(int32_t *batch_size, CBatchInfo info) {
+#ifdef ENABLE_PYTHON
  if (batch_size_func_ != nullptr) {
    RETURN_IF_NOT_OK(InvokeBatchSizeFunc(batch_size, info));
  } else {
    (*batch_size) = start_batch_size_;
  }
+#else
+  (*batch_size) = start_batch_size_;
+#endif
  return Status::OK();
 }

+#ifdef ENABLE_PYTHON
 Status BatchOp::InvokeBatchSizeFunc(int32_t *batch_size, CBatchInfo info) {
  {
    // Acquire Python GIL
@ -336,6 +365,7 @@ Status BatchOp::InvokeBatchMapFunc(TensorBatchTable *input, TensorBatchTable *ou
  }
  return Status(StatusCode::kOK);
 }
+#endif

 Status BatchOp::PadColumns(std::unique_ptr<TensorQTable> *table, const PadInfo &pad_info,
                           const std::unordered_map<std::string, int32_t> &column_name_id_map) {
--- a/mindspore/ccsrc/dataset/engine/datasetops/batch_op.h
+++ b/mindspore/ccsrc/dataset/engine/datasetops/batch_op.h
@ -89,6 +89,7 @@ class BatchOp : public ParallelOp {
      return *this;
    }

+#ifdef ENABLE_PYTHON
    // set columns to perform map on
    // @param const std::vector<std::string> & cols_to_map - name of columns to perform map on
    // @return Builder & reference to builder class object
@ -104,6 +105,7 @@ class BatchOp : public ParallelOp {
      builder_batch_size_func_ = batch_size_func;
      return *this;
    }
+#endif

    // @param std::shared_ptr<BatchOp>  *ptr pointer to shared_ptr, actual return arg
    // @return Status - The error code return
@ -121,8 +123,10 @@ class BatchOp : public ParallelOp {
    int32_t builder_op_connector_size_;
    std::vector<std::string> builder_cols_to_map_;
    PadInfo builder_pad_map_;
+#ifdef ENABLE_PYTHON
    py::function builder_batch_size_func_;
    py::function builder_batch_map_func_;
+#endif
  };

  enum batchCtrl : int8_t { kNoCtrl = 0, kEOE = 1, kEOF = 2, kQuit = 3 };
@ -144,6 +148,7 @@ class BatchOp : public ParallelOp {
    const int64_t get_epoch_num() const { return epoch_num_; }
  };

+#ifdef ENABLE_PYTHON
  // BatchOp constructor
  // @param int32_t batch_size
  // @param bool drop
@ -152,6 +157,10 @@ class BatchOp : public ParallelOp {
  // @param int32_t num_workers
  BatchOp(int32_t batch_size, bool drop, bool pad, int32_t op_queue_size, int32_t num_workers,
          const std::vector<std::string> &, py::function batch_size_func, py::function batch_map_func, PadInfo pad_map);
+#else
+  BatchOp(int32_t batch_size, bool drop, bool pad, int32_t op_queue_size, int32_t num_workers,
+          const std::vector<std::string> &, PadInfo pad_map);
+#endif

  // BatchOp destructor
  ~BatchOp() {}
@ -219,10 +228,13 @@ class BatchOp : public ParallelOp {
  // @return Status - The error code return
  Status MakeBatchedBuffer(std::pair<std::unique_ptr<TensorQTable>, CBatchInfo> table_pair,
                           std::unique_ptr<DataBuffer> *db);
+
+#ifdef ENABLE_PYTHON
  // Function that calls pyfunc to perform map on batch
  // @param (std::pair<std::unique_ptr<TensorQTable>, batch_stats> *table_pair - contains un-batched tensor
  // @return Status - The error code return
  Status MapColumns(std::pair<std::unique_ptr<TensorQTable>, CBatchInfo> *table_pair);
+#endif

  // @param const PadInfo &pad_info pad info to unpack
  // @param const std::unordered_map<std::string, int32_t>& column_name_id_map - column names to index mapping
@ -247,6 +259,7 @@ class BatchOp : public ParallelOp {
  // @return Status - The error code return
  Status LaunchThreadsAndInitOp();

+#ifdef ENABLE_PYTHON
  // Invoke batch size function with current BatchInfo to generate batch size.
  // @return Status - The error code return
  Status InvokeBatchSizeFunc(int32_t *batch_size, CBatchInfo info);
@ -254,6 +267,7 @@ class BatchOp : public ParallelOp {
  // Invoke batch map function with current BatchInfo to generate tensors to batch.
  // @return Status - The error code return
  Status InvokeBatchMapFunc(TensorTable *input, TensorTable *output, CBatchInfo info);
+#endif

  int32_t start_batch_size_;
  bool drop_;                                      // bool for whether to drop remainder or not
@ -262,8 +276,10 @@ class BatchOp : public ParallelOp {
  PadInfo pad_info_;                               // column names to perform padding on
  std::unique_ptr<ChildIterator> child_iterator_;  // child iterator for fetching TensorRows 1 by 1
  QueueList<std::pair<std::unique_ptr<TensorQTable>, CBatchInfo>> worker_queues_;  // internal queue for syncing worker
+#ifdef ENABLE_PYTHON
  py::function batch_size_func_;  // Function pointer of batch size function
  py::function batch_map_func_;   // Function pointer of per batch map function
+#endif
 };
 }  // namespace dataset
 }  // namespace mindspore
--- a/mindspore/ccsrc/dataset/engine/datasetops/source/CMakeLists.txt
+++ b/mindspore/ccsrc/dataset/engine/datasetops/source/CMakeLists.txt
@ -1,19 +1,32 @@
 add_subdirectory(sampler)
 file(GLOB_RECURSE _CURRENT_SRC_FILES RELATIVE ${CMAKE_CURRENT_SOURCE_DIR} "*.cc")
 set_property(SOURCE ${_CURRENT_SRC_FILES} PROPERTY COMPILE_DEFINITIONS SUBMODULE_ID=mindspore::SubModuleId::SM_MD)
-add_library(engine-datasetops-source OBJECT
-    generator_op.cc
+
+set(DATASET_ENGINE_DATASETOPS_SOURCE_SRC_FILES
    io_block.cc
-    mindrecord_op.cc
-    tf_reader_op.cc
    image_folder_op.cc
    mnist_op.cc
-    voc_op.cc
    coco_op.cc
-    manifest_op.cc
    cifar_op.cc
    random_data_op.cc
    celeba_op.cc
    text_file_op.cc
    clue_op.cc
-    )
+    )
+
+set(DATASET_ENGINE_DATASETOPS_SOURCE_SRC_FILES
+    ${DATASET_ENGINE_DATASETOPS_SOURCE_SRC_FILES}
+    mindrecord_op.cc
+    tf_reader_op.cc
+    )
+
+if (ENABLE_PYTHON)
+    set(DATASET_ENGINE_DATASETOPS_SOURCE_SRC_FILES
+        ${DATASET_ENGINE_DATASETOPS_SOURCE_SRC_FILES}
+        generator_op.cc
+        voc_op.cc
+        manifest_op.cc
+        )
+endif()
+
+add_library(engine-datasetops-source OBJECT ${DATASET_ENGINE_DATASETOPS_SOURCE_SRC_FILES})
--- a/mindspore/ccsrc/dataset/engine/datasetops/source/sampler/CMakeLists.txt
+++ b/mindspore/ccsrc/dataset/engine/datasetops/source/sampler/CMakeLists.txt
@ -1,12 +1,21 @@
 file(GLOB_RECURSE _CURRENT_SRC_FILES RELATIVE ${CMAKE_CURRENT_SOURCE_DIR} "*.cc")
 set_property(SOURCE ${_CURRENT_SRC_FILES} PROPERTY COMPILE_DEFINITIONS SUBMODULE_ID=mindspore::SubModuleId::SM_MD)
-add_library(engine-datasetops-source-sampler OBJECT
+
+set(DATASET_ENGINE_DATASETOPS_SOURCE_SAMPLER_SRC_FILES
    distributed_sampler.cc
    pk_sampler.cc
-    python_sampler.cc
    random_sampler.cc
    sampler.cc
    sequential_sampler.cc
    subset_random_sampler.cc
    weighted_random_sampler.cc
    )
+
+if (ENABLE_PYTHON)
+    set(DATASET_ENGINE_DATASETOPS_SOURCE_SAMPLER_SRC_FILES
+        ${DATASET_ENGINE_DATASETOPS_SOURCE_SAMPLER_SRC_FILES}
+        python_sampler.cc
+        )
+endif()
+
+add_library(engine-datasetops-source-sampler OBJECT ${DATASET_ENGINE_DATASETOPS_SOURCE_SAMPLER_SRC_FILES})
--- a/Show More
+++ b/Show More