!3024 Decode + RandomCropAndResize fusion within MapOp

Merge pull request !3024 from Alexey_Shevlyakov/random_crop_decode_resize_fusion
5 years ago · 530d46eb47
parent 99a6033aa2 b930162e79
commit 530d46eb47
64 changed files with 545 additions and 114 deletions
--- a/mindspore/ccsrc/dataset/engine/datasetops/dataset_op.h
+++ b/mindspore/ccsrc/dataset/engine/datasetops/dataset_op.h
@ -27,6 +27,7 @@

 namespace mindspore {
 namespace dataset {
+
 // Forward declare
 class ExecutionTree;

--- a/mindspore/ccsrc/dataset/engine/datasetops/map_op.h
+++ b/mindspore/ccsrc/dataset/engine/datasetops/map_op.h
@ -181,6 +181,13 @@ class MapOp : public ParallelOp {
  // @return Name of the current Op
  std::string Name() const override { return "MapOp"; }

+  // List of tensor ops getter/setter
+  // @Return the vector of tensor ops by non-const reference
+
+  auto &TFuncs() { return tfuncs_; }
+
+  const auto &TFuncs() const { return tfuncs_; }
+
 private:
  // Local queues where worker threads can pop from.
  // Popping directly from the Connector can block if the previous designated threads haven't pop.
@ -188,7 +195,7 @@ class MapOp : public ParallelOp {
  QueueList<std::unique_ptr<DataBuffer>> local_queues_;

  // Static variables to be ready by worker threads, no modification and readonly
-  const std::vector<std::shared_ptr<TensorOp>> tfuncs_;
+  std::vector<std::shared_ptr<TensorOp>> tfuncs_;

  // Variable to store the column name that the tensorOps are consuming
  std::vector<std::string> in_columns_;
--- a/mindspore/ccsrc/dataset/engine/execution_tree.cc
+++ b/mindspore/ccsrc/dataset/engine/execution_tree.cc
@ -23,6 +23,7 @@
 #include "dataset/engine/opt/pre/removal_pass.h"
 #include "dataset/engine/opt/pre/cache_transform_pass.h"
 #include "dataset/engine/opt/post/repeat_pass.h"
+#include "mindspore/ccsrc/dataset/engine/opt/optional/tensor_op_fusion_pass.h"
 #include "dataset/engine/perf/profiling.h"
 #include "dataset/engine/perf/monitor.h"

@ -35,6 +36,7 @@ ExecutionTree::ExecutionTree() : id_count_(0) {
  prepare_flags_ = kDePrepNone;
  perf_monitor_ = std::make_unique<Monitor>(this);
  profiling_manager_ = std::make_unique<ProfilingManager>(this);
+  optimize_ = common::GetEnv("OPTIMIZE") == "true" ? true : false;
 }

 // Destructor
@ -202,8 +204,10 @@ Status ExecutionTree::Prepare() {
  // Pre optimization compulsory transformation
  RETURN_IF_NOT_OK(this->PrepareTreePreAction());

-  // Optimization transformation
-  RETURN_IF_NOT_OK(this->Optimize());
+  // If optional optimizations are enabled
+  if (optimize_) {
+    RETURN_IF_NOT_OK(this->Optimize());
+  }

  // Post optimization compulsory transformation
  RETURN_IF_NOT_OK(this->PrepareTreePostAction());
@ -248,9 +252,16 @@ Status ExecutionTree::PrepareTreePostAction() {
 }

 Status ExecutionTree::Optimize() {
-  //  auto pp = new PrinterPass();
-  //  bool modified = false;
-  //  pp->Run(this, &modified);
+  // Vector of optimizations, currently only 1, add more as necessary
+  std::vector<std::unique_ptr<NodePass>> optimizations;
+  optimizations.push_back(std::make_unique<TensorOpFusionPass>());
+  // vector of flags for each optimization
+  std::vector<bool> modified(optimizations.size(), false);
+  for (auto i = 0; i < optimizations.size(); i++) {
+    auto m = false;
+    optimizations[i]->Run(this, &m);
+    modified[i] = m;
+  }
  return Status::OK();
 }

--- a/mindspore/ccsrc/dataset/engine/execution_tree.h
+++ b/mindspore/ccsrc/dataset/engine/execution_tree.h
@ -87,6 +87,8 @@ class ExecutionTree {
    // @return Shared pointer to the current operator
    std::shared_ptr<DatasetOp> get() { return nodes_[ind_]; }

+    bool operator==(const Iterator &rhs) { return nodes_[ind_] == rhs.nodes_[rhs.ind_]; }
+
    bool operator!=(const Iterator &rhs) { return nodes_[ind_] != rhs.nodes_[rhs.ind_]; }

    int32_t NumNodes() { return nodes_.size(); }
@ -214,6 +216,21 @@ class ExecutionTree {
  // Getter for profiling manager, no ownership
  ProfilingManager *GetProfilingManager() { return profiling_manager_.get(); }

+  // Set optional optimization if tree has not been prepared yet
+  Status SetOptimize(bool value) {
+    if (tree_state_ != kDeTStateInit && tree_state_ != kDeTStateBuilding) {
+      std::string optimize = (optimize_ == true) ? "true" : "false";
+      std::string msg = "Tree has already been prepared with OPTIMIZE set to " + optimize;
+      RETURN_STATUS_UNEXPECTED(msg);
+    } else {
+      optimize_ = value;
+      return Status::OK();
+    }
+  }
+
+  // Optional optimizations status
+  bool OptimizationEnabled() const { return optimize_; }
+
 private:
  // A helper functions for doing the recursive printing
  // @param dataset_op - The dataset op to print
@ -230,7 +247,10 @@ class ExecutionTree {
  TreeState tree_state_;                                 // Tracking the current tree state
  std::unique_ptr<Monitor> perf_monitor_;                // Performance Monitor
  std::unique_ptr<ProfilingManager> profiling_manager_;  // Profiling manager
+  bool optimize_;                                        // Flag to enable optional optimizations
 };
+
+inline bool operator==(const ExecutionTree::Iterator &lhs, const ExecutionTree::Iterator &rhs) { return lhs == rhs; }
 }  // namespace dataset
 }  // namespace mindspore

--- a/mindspore/ccsrc/dataset/engine/opt/CMakeLists.txt
+++ b/mindspore/ccsrc/dataset/engine/opt/CMakeLists.txt
@ -7,5 +7,6 @@ add_library(engine-opt OBJECT
          pre/cache_transform_pass.cc
          pre/removal_nodes.cc
          pre/removal_pass.cc
+          optional/tensor_op_fusion_pass.cc
          util/printer_pass.cc
        )
--- a/mindspore/ccsrc/dataset/engine/opt/optional/tensor_op_fusion_pass.cc
+++ b/mindspore/ccsrc/dataset/engine/opt/optional/tensor_op_fusion_pass.cc
@ -0,0 +1,58 @@
+/**
+ * Copyright 2020 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <memory>
+#include "dataset/engine/opt/optional/tensor_op_fusion_pass.h"
+#include "dataset/kernels/image/decode_op.h"
+#include "dataset/engine/datasetops/map_op.h"
+#include "dataset/kernels/image/random_crop_decode_resize_op.h"
+
+namespace mindspore {
+namespace dataset {
+
+Status TensorOpFusionPass::RunOnNode(std::shared_ptr<MapOp> node, bool *modified) {
+  // Most primitive pattern: DecodeOp immediately followed by RandomCropAndResizeOp
+  // Abstract into a more general member function that can find any pattern, expressed
+  // by regular expressions, for instance.
+  // Add a list of optimisation policies. For now, just this lambda
+  auto FindPattern = [](auto &tfuncs) {
+    auto it =
+      std::find_if(tfuncs.begin(), tfuncs.end(), [](const auto &tf) -> bool { return tf->Name() == kDecodeOp; });
+    auto next = it + 1;
+    if (it != tfuncs.end() && next != tfuncs.end() && (*next)->Name() == kRandomCropAndResizeOp) {
+      return it;
+    } else {
+      return tfuncs.end();
+    }
+  };
+
+  auto &tfuncs = node->TFuncs();
+  auto it = FindPattern(tfuncs);
+  if (it != tfuncs.end()) {
+    auto next = it + 1;
+    auto op = static_cast<RandomCropAndResizeOp *>(next->get());
+    *it = std::static_pointer_cast<TensorOp>(std::make_shared<RandomCropDecodeResizeOp>(*op));
+    tfuncs.erase(next);
+  }
+  if (modified != nullptr) {
+    *modified = true;
+  } else {
+    RETURN_STATUS_UNEXPECTED("modified is nullptr");
+  }
+  return Status::OK();
+}
+}  // namespace dataset
+}  // namespace mindspore
--- a/mindspore/ccsrc/dataset/engine/opt/optional/tensor_op_fusion_pass.h
+++ b/mindspore/ccsrc/dataset/engine/opt/optional/tensor_op_fusion_pass.h
@ -0,0 +1,38 @@
+/**
+ * Copyright 2020 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#ifndef DATASET_TENSOR_OP_FUSION_PASS_H_
+#define DATASET_TENSOR_OP_FUSION_PASS_H_
+
+#include <memory>
+#include "dataset/engine/opt/pass.h"
+
+namespace mindspore {
+namespace dataset {
+
+/// \class TensorOpFusionPass tensor_op_fusion_pass.h
+/// \brief And optional optimization pass identifying and fusing
+///     tensor ops within MapOp
+class TensorOpFusionPass : public NodePass {
+  /// \brief Identifies and fuses tensor ops within MapOp
+  /// \param[in] node The node being visited
+  /// \param[inout] *modified indicates whether the node has been visited
+  /// \return Status The error code return
+  Status RunOnNode(std::shared_ptr<MapOp> node, bool *modified) override;
+};
+}  // namespace dataset
+}  // namespace mindspore
+
+#endif  // DATASET_TENSOR_OP_FUSION_PASS_H_
--- a/mindspore/ccsrc/dataset/kernels/data/concatenate_op.h
+++ b/mindspore/ccsrc/dataset/kernels/data/concatenate_op.h
@ -55,6 +55,8 @@ class ConcatenateOp : public TensorOp {
  /// Number of inputs the tensor operation accepts
  uint32_t NumInput() override { return 0; }

+  std::string Name() const override { return kConcatenateOp; }
+
 private:
  int8_t axis_;
  std::shared_ptr<Tensor> prepend_;
--- a/mindspore/ccsrc/dataset/kernels/data/data_utils.cc
+++ b/mindspore/ccsrc/dataset/kernels/data/data_utils.cc
@ -127,7 +127,7 @@ Status Fill(const std::shared_ptr<Tensor> input, std::shared_ptr<Tensor> *output
  std::shared_ptr<Tensor> out, fill_output;

  if (input_type != DataType::DE_STRING && fill_type != DataType::DE_STRING && input_type != fill_type) {
-    std::unique_ptr<TypeCastOp> op(new TypeCastOp(input_type));
+    auto op = std::make_unique<TypeCastOp>(input_type);
    RETURN_IF_NOT_OK(op->Compute(fill_value, &fill_output));
  } else {
    fill_output = fill_value;
--- a/mindspore/ccsrc/dataset/kernels/data/duplicate_op.h
+++ b/mindspore/ccsrc/dataset/kernels/data/duplicate_op.h
@ -18,6 +18,7 @@

 #include <vector>
 #include <memory>
+#include <string>

 #include "dataset/core/tensor.h"
 #include "dataset/kernels/tensor_op.h"
@ -36,6 +37,8 @@ class DuplicateOp : public TensorOp {
  Status Compute(const TensorRow &input, TensorRow *output) override;

  uint32_t NumOutput() override { return 2; }
+
+  std::string Name() const override { return kDuplicateOp; }
 };
 }  // namespace dataset
 }  // namespace mindspore
--- a/mindspore/ccsrc/dataset/kernels/data/fill_op.h
+++ b/mindspore/ccsrc/dataset/kernels/data/fill_op.h
@ -35,6 +35,8 @@ class FillOp : public TensorOp {

  Status Compute(const std::shared_ptr<Tensor> &input, std::shared_ptr<Tensor> *output) override;

+  std::string Name() const override { return kFillOp; }
+
 private:
  std::shared_ptr<Tensor> fill_value_;
 };
--- a/mindspore/ccsrc/dataset/kernels/data/mask_op.h
+++ b/mindspore/ccsrc/dataset/kernels/data/mask_op.h
@ -43,6 +43,8 @@ class MaskOp : public TensorOp {

  Status OutputType(const std::vector<DataType> &inputs, std::vector<DataType> &outputs) override;

+  std::string Name() const override { return kMaskOp; }
+
 private:
  RelationalOp op_;
  std::shared_ptr<Tensor> value_;
--- a/mindspore/ccsrc/dataset/kernels/data/one_hot_op.h
+++ b/mindspore/ccsrc/dataset/kernels/data/one_hot_op.h
@ -37,6 +37,8 @@ class OneHotOp : public TensorOp {

  Status OutputShape(const std::vector<TensorShape> &inputs, std::vector<TensorShape> &outputs) override;

+  std::string Name() const override { return kOneHotOp; }
+
 private:
  int num_classes_;
 };
--- a/mindspore/ccsrc/dataset/kernels/data/pad_end_op.h
+++ b/mindspore/ccsrc/dataset/kernels/data/pad_end_op.h
@ -38,6 +38,8 @@ class PadEndOp : public TensorOp {

  Status OutputShape(const std::vector<TensorShape> &inputs, std::vector<TensorShape> &outputs) override;

+  std::string Name() const override { return kPadEndOp; }
+
 private:
  TensorShape output_shape_;
  std::shared_ptr<Tensor> pad_val_;
--- a/mindspore/ccsrc/dataset/kernels/data/slice_op.h
+++ b/mindspore/ccsrc/dataset/kernels/data/slice_op.h
@ -71,6 +71,8 @@ class SliceOp : public TensorOp {

  Status Compute(const std::shared_ptr<Tensor> &input, std::shared_ptr<Tensor> *output) override;

+  std::string Name() const override { return kSliceOp; }
+
 private:
  // only on of the following will be valid
  // given indices to slice the Tensor. Empty vector if invalid.
--- a/mindspore/ccsrc/dataset/kernels/data/to_float16_op.h
+++ b/mindspore/ccsrc/dataset/kernels/data/to_float16_op.h
@ -42,6 +42,8 @@ class ToFloat16Op : public TensorOp {
  void Print(std::ostream &out) const override { out << "ToFloat16Op"; }

  Status OutputType(const std::vector<DataType> &inputs, std::vector<DataType> &outputs) override;
+
+  std::string Name() const override { return kToFloat16Op; }
 };
 }  // namespace dataset
 }  // namespace mindspore
--- a/mindspore/ccsrc/dataset/kernels/data/type_cast_op.h
+++ b/mindspore/ccsrc/dataset/kernels/data/type_cast_op.h
@ -42,6 +42,8 @@ class TypeCastOp : public TensorOp {
  void Print(std::ostream &out) const override { out << "TypeCastOp"; }
  Status OutputType(const std::vector<DataType> &inputs, std::vector<DataType> &outputs) override;

+  std::string Name() const override { return kTypeCastOp; }
+
 private:
  DataType type_;
 };
--- a/mindspore/ccsrc/dataset/kernels/image/bounding_box_augment_op.h
+++ b/mindspore/ccsrc/dataset/kernels/image/bounding_box_augment_op.h
@ -20,6 +20,7 @@
 #include <memory>
 #include <random>
 #include <cstdlib>
+#include <string>
 #include <opencv2/imgproc/imgproc.hpp>
 #include "dataset/core/tensor.h"
 #include "dataset/kernels/tensor_op.h"
@ -50,6 +51,8 @@ class BoundingBoxAugmentOp : public TensorOp {

  Status Compute(const TensorRow &input, TensorRow *output) override;

+  std::string Name() const override { return kBoundingBoxAugmentOp; }
+
 private:
  float ratio_;
  std::mt19937 rnd_;
--- a/mindspore/ccsrc/dataset/kernels/image/center_crop_op.h
+++ b/mindspore/ccsrc/dataset/kernels/image/center_crop_op.h
@ -18,6 +18,7 @@

 #include <memory>
 #include <vector>
+#include <string>

 #include "dataset/core/tensor.h"
 #include "dataset/kernels/tensor_op.h"
@ -39,6 +40,8 @@ class CenterCropOp : public TensorOp {
  Status Compute(const std::shared_ptr<Tensor> &input, std::shared_ptr<Tensor> *output) override;
  Status OutputShape(const std::vector<TensorShape> &inputs, std::vector<TensorShape> &outputs) override;

+  std::string Name() const override { return kCenterCropOp; }
+
 private:
  int32_t crop_het_;
  int32_t crop_wid_;
--- a/mindspore/ccsrc/dataset/kernels/image/cut_out_op.h
+++ b/mindspore/ccsrc/dataset/kernels/image/cut_out_op.h
@ -61,6 +61,8 @@ class CutOutOp : public TensorOp {
  // @return Status - The error code return
  Status Compute(const std::shared_ptr<Tensor> &input, std::shared_ptr<Tensor> *output) override;

+  std::string Name() const override { return kCutOutOp; }
+
 private:
  std::mt19937 rnd_;
  int32_t box_height_;
--- a/mindspore/ccsrc/dataset/kernels/image/decode_op.h
+++ b/mindspore/ccsrc/dataset/kernels/image/decode_op.h
@ -18,6 +18,7 @@

 #include <memory>
 #include <vector>
+#include <string>

 #include "dataset/core/tensor.h"
 #include "dataset/kernels/tensor_op.h"
@ -40,6 +41,8 @@ class DecodeOp : public TensorOp {
  Status OutputShape(const std::vector<TensorShape> &inputs, std::vector<TensorShape> &outputs) override;
  Status OutputType(const std::vector<DataType> &inputs, std::vector<DataType> &outputs) override;

+  std::string Name() const override { return kDecodeOp; }
+
 private:
  bool is_rgb_format_ = true;
 };
--- a/mindspore/ccsrc/dataset/kernels/image/hwc_to_chw_op.h
+++ b/mindspore/ccsrc/dataset/kernels/image/hwc_to_chw_op.h
@ -18,6 +18,7 @@

 #include <memory>
 #include <vector>
+#include <string>

 #include "dataset/core/tensor.h"
 #include "dataset/kernels/tensor_op.h"
@ -31,6 +32,8 @@ class HwcToChwOp : public TensorOp {

  Status Compute(const std::shared_ptr<Tensor> &input, std::shared_ptr<Tensor> *output) override;
  Status OutputShape(const std::vector<TensorShape> &inputs, std::vector<TensorShape> &outputs) override;
+
+  std::string Name() const override { return kHwcToChwOp; }
 };
 }  // namespace dataset
 }  // namespace mindspore
--- a/mindspore/ccsrc/dataset/kernels/image/image_utils.cc
+++ b/mindspore/ccsrc/dataset/kernels/image/image_utils.cc
@ -311,7 +311,7 @@ Status JpegCropAndDecode(const std::shared_ptr<Tensor> &input, std::shared_ptr<T
  TensorShape ts = TensorShape({crop_h, crop_w, kOutNumComponents});
  auto output_tensor = std::make_shared<Tensor>(ts, DataType(DataType::DE_UINT8));
  const int buffer_size = output_tensor->SizeInBytes();
-  JSAMPLE *buffer = static_cast<JSAMPLE *>(reinterpret_cast<uchar *>(&(*output_tensor->begin<uint8_t>())));
+  JSAMPLE *buffer = reinterpret_cast<JSAMPLE *>(&(*output_tensor->begin<uint8_t>()));
  const int max_scanlines_to_read = skipped_scanlines + crop_h;
  // stride refers to output tensor, which has 3 components at most
  const int stride = crop_w * kOutNumComponents;
--- a/mindspore/ccsrc/dataset/kernels/image/normalize_op.h
+++ b/mindspore/ccsrc/dataset/kernels/image/normalize_op.h
@ -17,6 +17,7 @@
 #define DATASET_KERNELS_IMAGE_NORMALIZE_OP_H_

 #include <memory>
+#include <string>

 #include "dataset/core/cv_tensor.h"
 #include "dataset/core/tensor.h"
@ -35,6 +36,8 @@ class NormalizeOp : public TensorOp {

  Status Compute(const std::shared_ptr<Tensor> &input, std::shared_ptr<Tensor> *output) override;

+  std::string Name() const override { return kNormalizeOp; }
+
 private:
  std::shared_ptr<CVTensor> mean_;
  std::shared_ptr<CVTensor> std_;
--- a/mindspore/ccsrc/dataset/kernels/image/pad_op.h
+++ b/mindspore/ccsrc/dataset/kernels/image/pad_op.h
@ -18,6 +18,7 @@

 #include <memory>
 #include <vector>
+#include <string>

 #include "dataset/core/tensor.h"
 #include "dataset/kernels/tensor_op.h"
@ -53,6 +54,8 @@ class PadOp : public TensorOp {
  Status Compute(const std::shared_ptr<Tensor> &input, std::shared_ptr<Tensor> *output) override;
  Status OutputShape(const std::vector<TensorShape> &inputs, std::vector<TensorShape> &outputs) override;

+  std::string Name() const override { return kPadOp; }
+
 private:
  int32_t pad_top_;
  int32_t pad_bottom_;
--- a/Show More
+++ b/Show More