diff --git a/mindspore/ccsrc/dataset/api/de_pipeline.cc b/mindspore/ccsrc/dataset/api/de_pipeline.cc
index be133ea7a9..4a5dac198f 100644
--- a/mindspore/ccsrc/dataset/api/de_pipeline.cc
+++ b/mindspore/ccsrc/dataset/api/de_pipeline.cc
@@ -207,6 +207,8 @@ int DEPipeline::GetBatchSize() const { return batch_size_; }
 
 int DEPipeline::GetRepeatCount() const { return repeat_num_; }
 
+float ToFloat(const py::handle &handle) { return py::reinterpret_borrow<py::float_>(handle); }
+
 int ToInt(const py::handle &handle) { return py::reinterpret_borrow<py::int_>(handle); }
 
 bool ToBool(const py::handle &handle) { return py::reinterpret_borrow<py::bool_>(handle); }
@@ -621,6 +623,21 @@ Status DEPipeline::ParseBatchOp(const py::dict &args, std::shared_ptr<DatasetOp>
       if (key == "input_columns") {
         (void)builder->SetColumnsToMap(ToStringVector(value));
       }
+      if (key == "pad_info") {
+        std::map<std::string, std::pair<TensorShape, float>> pad_info;
+        for (auto p : py::reinterpret_borrow<py::dict>(value)) {
+          if (!p.second.is_none()) {
+            py::tuple tp = py::reinterpret_borrow<py::tuple>(p.second);
+            CHECK_FAIL_RETURN_UNEXPECTED(tp.size() == 2, "tuple in pad_info must be (list,int) or (list,float)");
+            TensorShape shape = tp[0].is_none() ? TensorShape::CreateUnknownRankShape() : TensorShape(tp[0]);
+            float pad_val = tp[1].is_none() ? 0 : ToFloat(tp[1]);
+            (void)pad_info.insert({ToString(p.first), {shape, pad_val}});
+          } else {  // tuple is None
+            (void)pad_info.insert({ToString(p.first), {TensorShape({}), 0}});
+          }
+        }
+        (void)builder->SetPaddingMap(pad_info, true);
+      }
     }
   }
 
diff --git a/mindspore/ccsrc/dataset/core/tensor.h b/mindspore/ccsrc/dataset/core/tensor.h
index 74da40c293..4a41d4bd20 100644
--- a/mindspore/ccsrc/dataset/core/tensor.h
+++ b/mindspore/ccsrc/dataset/core/tensor.h
@@ -93,10 +93,10 @@ class Tensor {
 
   // Copy raw data of a array based on shape and strides to the destination pointer
   // @param dst Pointer to the destination array where the content is to be copied
-  // @param src Pointer to the source of stided array to be copied
+  // @param src Pointer to the source of strided array to be copied
   // @param shape - shape of the source array
   // @param strides - strides of the source array
-  // @param type_size - number of bytes needed to store one array elment's type
+  // @param type_size - number of bytes needed to store one array element's type
   // @return Status Code
   static Status CopyStridedArray(unsigned char *dst, unsigned char *src, std::vector<dsize_t> shape,
                                  std::vector<dsize_t> strides, uint8_t type_size);
@@ -138,10 +138,10 @@ class Tensor {
     return Status::OK();
   }
 
+  // fill tensor with Zeros
   Status Zero() {
     dsize_t size = SizeInBytes();
-    int retCode = memset_sp(StartAddr(), size, 0, size);
-    if (retCode != 0) return Status(StatusCode::kUnexpectedError, "Failed to fill tensor with zeroes.");
+    CHECK_FAIL_RETURN_UNEXPECTED(memset_sp(StartAddr(), size, 0, size) == 0, "Failed to fill tensor with zeroes.");
     return Status::OK();
   }
 
@@ -154,10 +154,7 @@ class Tensor {
     int64_t cellSize = type_.SizeInBytes();
     if ((data_ != nullptr) && type_.IsCompatible<T>()) {
       for (dsize_t i = 0; i < Size(); i++) {
-        int retCode = memcpy_s((data_ + i * cellSize), cellSize, &value, cellSize);
-        if (retCode != 0) {
-          return Status(StatusCode::kUnexpectedError, "Failed to fill tensor.");
-        }
+        CHECK_FAIL_RETURN_UNEXPECTED(memcpy_s((data_ + i * cellSize), cellSize, &value, cellSize) == 0, "memcpy err");
       }
       return Status::OK();
     } else {
diff --git a/mindspore/ccsrc/dataset/core/tensor_shape.cc b/mindspore/ccsrc/dataset/core/tensor_shape.cc
index 24520dc381..3a6514034f 100644
--- a/mindspore/ccsrc/dataset/core/tensor_shape.cc
+++ b/mindspore/ccsrc/dataset/core/tensor_shape.cc
@@ -87,8 +87,12 @@ TensorShape::TensorShape(const TensorShape &shape) : raw_shape_(*GlobalContext::
 
 TensorShape::TensorShape(py::list l) : raw_shape_(*GlobalContext::Instance()->int_allocator()) {
   std::vector<dsize_t> list_c;
-  for (auto i : l) {
-    list_c.push_back(i.cast<int>());
+  for (auto &i : l) {
+    if (!i.is_none()) {
+      list_c.push_back(i.cast<int>());
+    } else {
+      list_c.push_back(TensorShape::kDimUnknown);
+    }
   }
   AddListToShape(list_c);
 }
diff --git a/mindspore/ccsrc/dataset/core/tensor_shape.h b/mindspore/ccsrc/dataset/core/tensor_shape.h
index f908a00ecc..230b36cda2 100644
--- a/mindspore/ccsrc/dataset/core/tensor_shape.h
+++ b/mindspore/ccsrc/dataset/core/tensor_shape.h
@@ -65,6 +65,10 @@ class TensorShape {
   // @param shape
   TensorShape(const TensorShape &shape);
 
+  // construct a TensorShape via a python list
+  // @param py::list l - a list object from python
+  explicit TensorShape(py::list l);
+
   ~TensorShape() = default;
 
   // Create a scalar Shape (i.e., empty shape with mKnown = true)
@@ -142,8 +146,6 @@ class TensorShape {
     return out;
   }
 
-  explicit TensorShape(py::list l);
-
   py::list AsPyList();
 
   // Checks if the given index is a valid index for this tensor.
diff --git a/mindspore/ccsrc/dataset/engine/datasetops/batch_op.cc b/mindspore/ccsrc/dataset/engine/datasetops/batch_op.cc
index a86633e5b4..018ff99e52 100644
--- a/mindspore/ccsrc/dataset/engine/datasetops/batch_op.cc
+++ b/mindspore/ccsrc/dataset/engine/datasetops/batch_op.cc
@@ -14,15 +14,20 @@
  * limitations under the License.
  */
 #include "dataset/engine/datasetops/batch_op.h"
+
 #include <utility>
 #include <iomanip>
+
 #include "common/utils.h"
+#include "dataset/core/pybind_support.h"
 #include "dataset/engine/data_buffer.h"
 #include "dataset/engine/db_connector.h"
 
+using float16 = Eigen::half;
+
 namespace mindspore {
 namespace dataset {
-BatchOp::Builder::Builder(int32_t batch_size) : builder_drop_(false) {
+BatchOp::Builder::Builder(int32_t batch_size) : builder_drop_(false), builder_pad_(false), builder_pad_map_({}) {
   builder_batch_size_ = batch_size;
   std::shared_ptr<ConfigManager> cfg = GlobalContext::config_manager();
   builder_num_workers_ = cfg->num_parallel_workers();
@@ -31,8 +36,9 @@ BatchOp::Builder::Builder(int32_t batch_size) : builder_drop_(false) {
 
 Status BatchOp::Builder::Build(std::shared_ptr<BatchOp> *ptr) {
   RETURN_IF_NOT_OK(SanityCheck());
-  *ptr = std::make_shared<BatchOp>(builder_batch_size_, builder_drop_, builder_op_connector_size_, builder_num_workers_,
-                                   builder_cols_to_map_, builder_batch_size_func_, builder_batch_map_func_);
+  *ptr = std::make_shared<BatchOp>(builder_batch_size_, builder_drop_, builder_pad_, builder_op_connector_size_,
+                                   builder_num_workers_, builder_cols_to_map_, builder_batch_size_func_,
+                                   builder_batch_map_func_, builder_pad_map_);
   return Status::OK();
 }
 
@@ -44,14 +50,17 @@ Status BatchOp::Builder::SanityCheck() {
   return err.empty() ? Status::OK() : Status(StatusCode::kUnexpectedError, __LINE__, __FILE__, common::SafeCStr(err));
 }
 
-BatchOp::BatchOp(int32_t batch_size, bool drop, int32_t op_queue_size, int32_t num_workers,
-                 const std::vector<std::string> &cols_to_map, py::function batch_size_func, py::function batch_map_func)
+BatchOp::BatchOp(int32_t batch_size, bool drop, bool pad, int32_t op_queue_size, int32_t num_workers,
+                 const std::vector<std::string> &cols_to_map, py::function batch_size_func, py::function batch_map_func,
+                 std::map<std::string, std::pair<TensorShape, float>> pad_map)
     : ParallelOp(num_workers, op_queue_size),
       start_batch_size_(batch_size),
       drop_(drop),
-      input_column_names_(cols_to_map),
+      pad_(pad),
+      pyfunc_column_names_(cols_to_map),
       batch_size_func_(batch_size_func),
-      batch_map_func_(batch_map_func) {
+      batch_map_func_(batch_map_func),
+      pad_info_(pad_map) {
   worker_queues_.Init(num_workers, op_queue_size);
 }
 
@@ -181,7 +190,8 @@ Status BatchOp::WorkerEntry(int32_t workerId) {
 Status BatchOp::MakeBatchedBuffer(std::pair<std::unique_ptr<TensorQTable>, CBatchInfo> table_pair,
                                   std::unique_ptr<DataBuffer> *db) {
   RETURN_UNEXPECTED_IF_NULL(table_pair.first);
-  if (!input_column_names_.empty()) RETURN_IF_NOT_OK(MapColumns(&table_pair));  // pass it through pyfunc
+  if (!pyfunc_column_names_.empty()) RETURN_IF_NOT_OK(MapColumns(&table_pair));  // pass it through pyfunc
+  if (pad_) RETURN_IF_NOT_OK(PadColumns(&table_pair));                           // do padding if needed
   (*db) = std::make_unique<DataBuffer>(table_pair.second.batch_num_, DataBuffer::kDeBFlagNone);
   std::unique_ptr<TensorQTable> dest_table = std::make_unique<TensorQTable>();
   RETURN_IF_NOT_OK(BatchRows(&table_pair.first, &dest_table, table_pair.first->size()));
@@ -206,8 +216,8 @@ Status BatchOp::EoeReceived(int32_t) {
 
 Status BatchOp::MapColumns(std::pair<std::unique_ptr<TensorQTable>, CBatchInfo> *table_pair) {
   TensorBatchTable input_table;
-  input_table.reserve(input_column_names_.size());
-  for (std::string col_name : input_column_names_) {
+  input_table.reserve(pyfunc_column_names_.size());
+  for (std::string col_name : pyfunc_column_names_) {
     if (column_name_map_.find(col_name) == column_name_map_.end()) {
       RETURN_STATUS_UNEXPECTED("column : '" + col_name + "' does not exist\n");
     }
@@ -225,8 +235,8 @@ Status BatchOp::MapColumns(std::pair<std::unique_ptr<TensorQTable>, CBatchInfo>
   RETURN_IF_NOT_OK(InvokeBatchMapFunc(&input_table, &output_table, table_pair->second));
 
   // Write back to TensorQTable
-  for (size_t input_idx = 0; input_idx < input_column_names_.size(); input_idx++) {
-    size_t col_idx = static_cast<size_t>(column_name_map_[input_column_names_[input_idx]]);
+  for (size_t input_idx = 0; input_idx < pyfunc_column_names_.size(); input_idx++) {
+    size_t col_idx = static_cast<size_t>(column_name_map_[pyfunc_column_names_[input_idx]]);
     size_t row_id = 0;
     for (TensorRow &row : *(table_pair->first)) {
       row[col_idx] = std::move(output_table[input_idx][row_id++]);
@@ -290,8 +300,8 @@ Status BatchOp::InvokeBatchMapFunc(TensorBatchTable *input, TensorBatchTable *ou
       py::object ret_py_obj = batch_map_func_(*input_args);
       // Parse batch map return value
       py::tuple ret_tuple = py::cast<py::tuple>(ret_py_obj);
-      if (ret_tuple.size() != input_column_names_.size() || !py::isinstance<py::tuple>(ret_tuple)) {
-        return Status(StatusCode::kPyFuncException, "Batch map function should return an tuple if size(input_columns)");
+      if (ret_tuple.size() != pyfunc_column_names_.size() || !py::isinstance<py::tuple>(ret_tuple)) {
+        return Status(StatusCode::kPyFuncException, "Batch map function should return a tuple");
       }
       for (size_t i = 0; i < ret_tuple.size(); i++) {
         TensorBatch output_batch;
@@ -311,5 +321,142 @@ Status BatchOp::InvokeBatchMapFunc(TensorBatchTable *input, TensorBatchTable *ou
   }
   return Status(StatusCode::kOK);
 }
+
+Status BatchOp::PadTensor(std::shared_ptr<Tensor> src, std::shared_ptr<Tensor> *dst,
+                          const std::vector<dsize_t> &pad_shape, float pad_val) {
+  CHECK_FAIL_RETURN_UNEXPECTED(src != nullptr && dst != nullptr, "tensor can't be nullptr");
+  if (src->Rank() == 0 || src->shape().AsVector() == pad_shape) {
+    (*dst) = src;  // if no padding, copy the pointer
+  } else {
+    CHECK_FAIL_RETURN_UNEXPECTED(src->Rank() == pad_shape.size(), "Pad to diff rank not allowed");
+    RETURN_IF_NOT_OK(Tensor::CreateTensor(dst, TensorImpl::kFlexible, TensorShape(pad_shape), src->type()));
+    auto tensor_type = src->type().value();
+    if (pad_val == 0) {  // if pad with zero, don't care what type it is
+      RETURN_IF_NOT_OK((*dst)->Zero());
+    } else if (tensor_type == DataType::DE_INT8) {
+      RETURN_IF_NOT_OK((*dst)->Fill<int8_t>(pad_val));
+    } else if (tensor_type == DataType::DE_BOOL) {
+      RETURN_IF_NOT_OK((*dst)->Fill<bool>(pad_val));
+    } else if (tensor_type == DataType::DE_UINT8) {
+      RETURN_IF_NOT_OK((*dst)->Fill<uint8_t>(pad_val));
+    } else if (tensor_type == DataType::DE_INT16) {
+      RETURN_IF_NOT_OK((*dst)->Fill<int16_t>(pad_val));
+    } else if (tensor_type == DataType::DE_FLOAT16) {
+      RETURN_IF_NOT_OK((*dst)->Fill<float16>(static_cast<float16>(pad_val)));
+    } else if (tensor_type == DataType::DE_UINT16) {
+      RETURN_IF_NOT_OK((*dst)->Fill<uint16_t>(pad_val));
+    } else if (tensor_type == DataType::DE_INT32) {
+      RETURN_IF_NOT_OK((*dst)->Fill<int32_t>(pad_val));
+    } else if (tensor_type == DataType::DE_UINT32) {
+      RETURN_IF_NOT_OK((*dst)->Fill<uint32_t>(pad_val));
+    } else if (tensor_type == DataType::DE_INT64) {
+      RETURN_IF_NOT_OK((*dst)->Fill<int64_t>(pad_val));
+    } else if (tensor_type == DataType::DE_UINT64) {
+      RETURN_IF_NOT_OK((*dst)->Fill<uint64_t>(pad_val));
+    } else if (tensor_type == DataType::DE_FLOAT32) {
+      RETURN_IF_NOT_OK((*dst)->Fill<float>(pad_val));
+    } else if (tensor_type == DataType::DE_FLOAT64) {
+      RETURN_IF_NOT_OK((*dst)->Fill<double>(pad_val));
+    } else {
+      RETURN_STATUS_UNEXPECTED("Incorrect/Unknown tensor type");
+    }
+    std::vector<dsize_t> cur_ind(src->Rank(), 0), src_s(src->Rank(), 1), dst_s(src->Rank(), 1);
+    for (dsize_t i = src->Rank() - 2; i >= 0; i--) {
+      src_s[i] = src->shape()[i + 1] * src_s[i + 1];
+      dst_s[i] = pad_shape[i + 1] * dst_s[i + 1];
+    }
+    RETURN_IF_NOT_OK(PadHelper(src, *dst, cur_ind, src_s, dst_s, 0));
+  }
+  return Status::OK();
+}  // namespace dataset
+
+Status BatchOp::PadColumns(std::pair<std::unique_ptr<TensorQTable>, CBatchInfo> *table_pair) {
+  RETURN_UNEXPECTED_IF_NULL(table_pair);  // placeholder for now, might need this in the future
+  CHECK_FAIL_RETURN_UNEXPECTED(table_pair->first->front().size() == column_name_map_.size(), "col_name_map mismatch");
+  std::vector<float> pad_vals(column_name_map_.size(), 0);  // value to pad each column's tensor with, default 0
+  std::set<int32_t> pad_cols;
+  // padded_shape provided by user, maximum shapes of current batch of tensors
+  std::vector<std::vector<dsize_t>> pad_shapes(column_name_map_.size()), max_shapes(column_name_map_.size());
+  RETURN_IF_NOT_OK(UnpackPadInfo(&pad_cols, &pad_vals, &pad_shapes));
+
+  // init each shape in max_shape to {-1,-1...} init each unspecified shape in pad_shape to -1 as well
+  for (size_t col_id : pad_cols) {
+    max_shapes[col_id] = std::vector<dsize_t>(table_pair->first->front()[col_id]->Rank(), -1);
+    if (pad_shapes[col_id].empty()) pad_shapes[col_id] = max_shapes[col_id];  // fill pad shape with -1
+    CHECK_FAIL_RETURN_UNEXPECTED(pad_shapes[col_id].size() == max_shapes[col_id].size(), "wrong rank in pad_shape");
+  }
+
+  // calculate maximum shape for each column that needs to be padded
+  for (const TensorRow &row : *(table_pair->first)) {  // iterator each row in a batch
+    for (size_t col_id : pad_cols) {                   // iterator each tensor in a row
+      CHECK_FAIL_RETURN_UNEXPECTED(row[col_id]->Rank() == max_shapes[col_id].size(),
+                                   "Tensor to be padded together need to have the same rank");
+      for (size_t dim = 0; dim < row[col_id]->Rank(); dim++) {  // pick the largest number in each dimension
+        max_shapes[col_id][dim] = std::max(max_shapes[col_id][dim], row[col_id]->shape()[dim]);
+      }
+    }
+  }
+
+  // if user sets a dimension to -1 (None in python), use the max value for current dimension
+  for (size_t col_id : pad_cols) {
+    for (size_t dim = 0; dim < pad_shapes[col_id].size(); dim++) {
+      if (pad_shapes[col_id][dim] < 0) pad_shapes[col_id][dim] = max_shapes[col_id][dim];
+    }
+  }
+
+  // call pad on each tensor that needs to be padded
+  for (TensorRow &row : *(table_pair->first)) {
+    for (size_t col_id : pad_cols) {
+      std::shared_ptr<Tensor> pad_tensor;
+      RETURN_IF_NOT_OK(PadTensor(row[col_id], &pad_tensor, pad_shapes[col_id], pad_vals[col_id]));
+      row[col_id] = pad_tensor;
+    }
+  }
+  return Status::OK();
+}
+
+Status BatchOp::UnpackPadInfo(std::set<int32_t> *pad_cols, std::vector<float> *pad_vals,
+                              std::vector<std::vector<dsize_t>> *pad_shapes) {
+  if (pad_info_.empty()) {  // if pad_info empty, pad every columns automatically
+    for (dsize_t col_id = 0; col_id < column_name_map_.size(); col_id++) {
+      pad_cols->insert(col_id);
+    }
+  } else {
+    for (auto p : pad_info_) {
+      CHECK_FAIL_RETURN_UNEXPECTED(column_name_map_.find(p.first) != column_name_map_.end(),
+                                   "no column exists with name:" + p.first);
+      dsize_t col_id = static_cast<dsize_t>(column_name_map_[p.first]);
+      CHECK_FAIL_RETURN_UNEXPECTED(col_id < pad_vals->size() && col_id < pad_shapes->size(), "col_id out of bound");
+      pad_cols->insert(col_id);
+      (*pad_vals)[col_id] = p.second.second;              // set pad values
+      (*pad_shapes)[col_id] = p.second.first.AsVector();  // empty vector if shape is unknown
+    }
+  }
+  return Status::OK();
+}
+
+Status BatchOp::PadHelper(std::shared_ptr<Tensor> src, std::shared_ptr<Tensor> dst, std::vector<dsize_t> cur_ind,
+                          const std::vector<dsize_t> &src_s, const std::vector<dsize_t> &dst_s, size_t cur_dim) {
+  if (cur_dim == src->Rank() - 1) {  // if this is the last dimension, copy the data
+    uint8_t type_size = src->type().SizeInBytes();
+    size_t len = std::min(src->shape()[cur_dim], dst->shape()[cur_dim]) * type_size;
+    dsize_t src_flat_ind = 0, dst_flat_ind = 0;
+    for (size_t i = 0; i < src->Rank(); i++) {
+      src_flat_ind += src_s[i] * cur_ind[i];
+      dst_flat_ind += dst_s[i] * cur_ind[i];
+    }
+    unsigned char *src_addr = src->StartAddr() + src_flat_ind * type_size;
+    unsigned char *dst_addr = dst->StartAddr() + dst_flat_ind * type_size;
+    CHECK_FAIL_RETURN_UNEXPECTED(memcpy_s(dst_addr, len, src_addr, len) == 0, "memcpy error");
+  } else {  // not the last dimension, keep doing recursion
+    dsize_t min_ind = std::min(dst->shape()[cur_dim], src->shape()[cur_dim]);
+    for (dsize_t i = 0; i < min_ind; i++) {
+      cur_ind[cur_dim] = i;
+      RETURN_IF_NOT_OK(PadHelper(src, dst, cur_ind, src_s, dst_s, cur_dim + 1));
+    }
+  }
+  return Status::OK();
+}
+
 }  // namespace dataset
 }  // namespace mindspore
diff --git a/mindspore/ccsrc/dataset/engine/datasetops/batch_op.h b/mindspore/ccsrc/dataset/engine/datasetops/batch_op.h
index 32d386e3c9..f17239e378 100644
--- a/mindspore/ccsrc/dataset/engine/datasetops/batch_op.h
+++ b/mindspore/ccsrc/dataset/engine/datasetops/batch_op.h
@@ -16,8 +16,11 @@
 #ifndef DATASET_ENGINE_DATASETOPS_BATCH_OP_H_
 #define DATASET_ENGINE_DATASETOPS_BATCH_OP_H_
 
+#include <algorithm>
+#include <map>
 #include <memory>
 #include <queue>
+#include <set>
 #include <string>
 #include <unordered_map>
 #include <utility>
@@ -44,10 +47,6 @@ class BatchOp : public ParallelOp {
     // @param int32_t batch_size
     explicit Builder(int32_t batch_size);
 
-    // Builder constructor for Batch, batch size function needs to be specified
-    // @param py::function batch_size_func
-    explicit Builder(py::function batch_size_func);
-
     // Default destructor
     ~Builder() = default;
 
@@ -67,6 +66,12 @@ class BatchOp : public ParallelOp {
       return *this;
     }
 
+    Builder &SetPaddingMap(const std::map<std::string, std::pair<TensorShape, float>> &pad_map, bool pad = true) {
+      builder_pad_ = pad;
+      builder_pad_map_ = pad_map;
+      return *this;
+    }
+
     // set connector size for batch
     // @param int32_t op_conn_size
     // @return Builder & reference to builder class object
@@ -109,11 +114,12 @@ class BatchOp : public ParallelOp {
     Status SanityCheck();
 
     bool builder_drop_;
+    bool builder_pad_;
     int32_t builder_batch_size_;
     int32_t builder_num_workers_;
     int32_t builder_op_connector_size_;
     std::vector<std::string> builder_cols_to_map_;
-
+    std::map<std::string, std::pair<TensorShape, float>> builder_pad_map_;
     py::function builder_batch_size_func_;
     py::function builder_batch_map_func_;
   };
@@ -143,8 +149,9 @@ class BatchOp : public ParallelOp {
   // @param int32_t op_queue_size
   // @param int32_t rows_per_buf
   // @param int32_t num_workers
-  BatchOp(int32_t batch_size, bool drop, int32_t op_queue_size, int32_t num_workers, const std::vector<std::string> &,
-          py::function batch_size_func, py::function batch_map_func);
+  BatchOp(int32_t batch_size, bool drop, bool pad, int32_t op_queue_size, int32_t num_workers,
+          const std::vector<std::string> &, py::function batch_size_func, py::function batch_map_func,
+          std::map<std::string, std::pair<TensorShape, float>> pad_map);
 
   // BatchOp destructor
   ~BatchOp() {}
@@ -176,7 +183,28 @@ class BatchOp : public ParallelOp {
   // @return Status - The error code return
   Status operator()() override;
 
+  // Pad input tensor according pad_shape, need to have same rank.
+  // @param std::shared_ptr<Tensor> src - tensor to pad from
+  // @param std::shared_ptr<Tensor> *dst - return tensor padded
+  // @param std::vector<dsize_t> pad_shape - shape to pad to
+  // @param float pad_val - value to pad with
+  // @return - The error code return
+  Status PadTensor(std::shared_ptr<Tensor> src, std::shared_ptr<Tensor> *dst, const std::vector<dsize_t> &pad_shape,
+                   float pad_val);
+
  private:
+  // recursive helper function. This function could be very expensive if called on a multi-dimensional tensor
+  // it is only meant to be called by PadTensor.
+  // @tparam T - type of tensor and fill value
+  // @param std::shared_ptr<Tensor> src - Tensor to pad from
+  // @param std::shared_ptr<Tensor>* dst - Tensor to pad to, return value
+  // @param std::vector<dsize_t> cur_ind - recursion helper
+  // @param T pad_val - value to pad tensor with
+  // @param size_t cur_dim - recursion helper
+  // @return Status - The error code return
+  Status PadHelper(std::shared_ptr<Tensor> src, std::shared_ptr<Tensor> dst, std::vector<dsize_t> cur_ind,
+                   const std::vector<dsize_t> &src_s, const std::vector<dsize_t> &dst_s, size_t cur_dim = 0);
+
   // Worker thread for doing the memcpy of batch
   // @param int32_t param workerId
   // @return Status - The error code return
@@ -199,6 +227,16 @@ class BatchOp : public ParallelOp {
   // @return Status - The error code return
   Status MapColumns(std::pair<std::unique_ptr<TensorQTable>, CBatchInfo> *table_pair);
 
+  // @param std::set<int32_t> *cols, col ids to perform pad on
+  // @param std::vector<float> *vals, default padding value for each column
+  // @param std::vector<std::vector<dsize_t>> *shapes, padding shape specified by user
+  // @return Status - The error code return
+  Status UnpackPadInfo(std::set<int32_t> *cols, std::vector<float> *vals, std::vector<std::vector<dsize_t>> *shapes);
+
+  // @param table_pair
+  // @return Status - The error code return
+  Status PadColumns(std::pair<std::unique_ptr<TensorQTable>, CBatchInfo> *table_pair);
+
   // the number of thread pulling from the mOutConnector of the Op below
   // @return int32_t, 1
   int32_t num_consumers() const override { return 1; }
@@ -220,19 +258,15 @@ class BatchOp : public ParallelOp {
   Status InvokeBatchMapFunc(TensorTable *input, TensorTable *output, CBatchInfo info);
 
   int32_t start_batch_size_;
-  bool drop_;
-  // Name of the columns to perform map op on
-  std::vector<std::string> input_column_names_;
-  // Iterator for fetching
-  std::unique_ptr<ChildIterator> child_iterator_;
-  // Map of column_name: column_index
-  std::unordered_map<std::string, int32_t> column_name_map_;
-  // Internal queue for task distribution
-  QueueList<std::pair<std::unique_ptr<TensorQTable>, CBatchInfo>> worker_queues_;
-  // Function pointer of batch size function
-  py::function batch_size_func_;
-  // Function pointer of per batch map function
-  py::function batch_map_func_;
+  bool drop_;                                                      // bool for whether to drop remainder or not
+  bool pad_;                                                       // bool for whether to perform padding on tensor
+  std::vector<std::string> pyfunc_column_names_;                   // Name of the columns to perform map op on
+  std::map<std::string, std::pair<TensorShape, float>> pad_info_;  // column names to perform padding on
+  std::unique_ptr<ChildIterator> child_iterator_;                  // child iterator for fetching TensorRows 1 by 1
+  std::unordered_map<std::string, int32_t> column_name_map_;       // Map of column_name: column_index
+  QueueList<std::pair<std::unique_ptr<TensorQTable>, CBatchInfo>> worker_queues_;  // internal queue for syncing worker
+  py::function batch_size_func_;  // Function pointer of batch size function
+  py::function batch_map_func_;   // Function pointer of per batch map function
 };
 }  // namespace dataset
 }  // namespace mindspore
diff --git a/mindspore/dataset/engine/datasets.py b/mindspore/dataset/engine/datasets.py
index 06b740bb6b..73bd025e19 100644
--- a/mindspore/dataset/engine/datasets.py
+++ b/mindspore/dataset/engine/datasets.py
@@ -40,7 +40,8 @@ from mindspore._c_expression import typing
 from mindspore import log as logger
 from . import samplers
 from .iterators import DictIterator, TupleIterator
-from .validators import check, check_batch, check_shuffle, check_map, check_filter, check_repeat, check_skip, check_zip, check_rename, \
+from .validators import check, check_batch, check_shuffle, check_map, check_filter, check_repeat, check_skip, check_zip, \
+    check_rename, \
     check_take, check_project, check_imagefolderdatasetv2, check_mnist_cifar_dataset, check_manifestdataset, \
     check_tfrecorddataset, check_vocdataset, check_celebadataset, check_minddataset, check_generatordataset, \
     check_sync_wait, check_zip_dataset, check_add_column, check_textfiledataset
@@ -163,7 +164,7 @@ class Dataset:
 
     @check_batch
     def batch(self, batch_size, drop_remainder=False, num_parallel_workers=None, per_batch_map=None,
-              input_columns=None):
+              input_columns=None, pad_info=None):
         """
         Combines batch_size number of consecutive rows into batches.
 
@@ -181,7 +182,7 @@ class Dataset:
             drop_remainder (bool, optional): Determines whether or not to drop the last
                 possibly incomplete batch (default=False). If True, and if there are less
                 than batch_size rows available to make the last batch, then those rows will
-                be dropped and not propogated to the child node.
+                be dropped and not propagated to the child node.
             num_parallel_workers (int, optional): Number of workers to process the Dataset in parallel (default=None).
             per_batch_map (callable, optional): Per batch map callable. A callable which takes
                 (list[Tensor], list[Tensor], ..., BatchInfo) as input parameters. Each list[Tensor] represent a batch of
@@ -189,6 +190,8 @@ class Dataset:
                 last parameter of the callable should always be a BatchInfo object.
             input_columns (list of string, optional): List of names of the input columns. The size of the list should
                 match with signature of per_batch_map callable.
+            pad_info (dict, optional): Whether to perform padding on selected columns. pad_info={"col1":([224,224],0)}
+                would pad column with name "col1" to a tensor of size [224,224] and fill the missing with 0.
 
         Returns:
             BatchDataset, dataset batched.
@@ -200,7 +203,8 @@ class Dataset:
             >>> # and drops the last incomplete batch if there is one.
             >>> data = data.batch(100, True)
         """
-        return BatchDataset(self, batch_size, drop_remainder, num_parallel_workers, per_batch_map, input_columns)
+        return BatchDataset(self, batch_size, drop_remainder, num_parallel_workers, per_batch_map, input_columns,
+                            pad_info)
 
     @check_sync_wait
     def sync_wait(self, condition_name, num_batch=1, callback=None):
@@ -1026,13 +1030,26 @@ class BatchDataset(DatasetOp):
 
     Args:
         input_dataset (Dataset): Input Dataset to be batched.
-        batch_size (int): The size of the batch.
-        drop_remainder (bool, optional): Whether drop the remainder batch of data (drop_remainder=False).
-            If True, the last incomplete batch will be dropped.
+        batch_size (int or function): The number of rows each batch is created with. An
+            int or callable which takes exactly 1 parameter, BatchInfo.
+        drop_remainder (bool, optional): Determines whether or not to drop the last
+            possibly incomplete batch (default=False). If True, and if there are less
+            than batch_size rows available to make the last batch, then those rows will
+            be dropped and not propagated to the child node.
+        num_parallel_workers (int, optional): Number of workers to process the Dataset in parallel (default=None).
+        per_batch_map (callable, optional): Per batch map callable. A callable which takes
+            (list[Tensor], list[Tensor], ..., BatchInfo) as input parameters. Each list[Tensor] represent a batch of
+            Tensors on a given column. The number of lists should match with number of entries in input_columns. The
+            last parameter of the callable should always be a BatchInfo object.
+        input_columns (list of string, optional): List of names of the input columns. The size of the list should
+            match with signature of per_batch_map callable.
+        pad_info (dict, optional): Whether to perform padding on selected columns. pad_info={"col1":([224,224],0)}
+            would pad column with name "col1" to a tensor of size [224,224] and fill the missing with 0.
+
     """
 
     def __init__(self, input_dataset, batch_size, drop_remainder=False, num_parallel_workers=None,
-                 per_batch_map=None, input_columns=None):
+                 per_batch_map=None, input_columns=None, pad_info=None):
         super().__init__(num_parallel_workers)
 
         if BatchDataset._is_ancestor_of_repeat(input_dataset):
@@ -1044,6 +1061,7 @@ class BatchDataset(DatasetOp):
         self.drop_remainder = drop_remainder
         self.per_batch_map = per_batch_map
         self.input_columns = input_columns
+        self.pad_info = pad_info
         self.input.append(input_dataset)
         input_dataset.output.append(self)
         self._input_indexs = input_dataset.input_indexs
@@ -1054,6 +1072,7 @@ class BatchDataset(DatasetOp):
         args["drop_remainder"] = self.drop_remainder
         args["per_batch_map"] = self.per_batch_map
         args["input_columns"] = self.input_columns
+        args["pad_info"] = self.pad_info
         return args
 
     def get_dataset_size(self):
@@ -2702,6 +2721,7 @@ class TFRecordDataset(SourceDataset):
         >>> # 3) get all rows from dataset_files with schema file "./schema.json":
         >>> tfdataset = ds.TFRecordDataset(dataset_files=dataset_files, schema="./schema.json")
     """
+
     @check_tfrecorddataset
     def __init__(self, dataset_files, schema=None, columns_list=None, num_samples=None, num_parallel_workers=None,
                  shuffle=Shuffle.GLOBAL, num_shards=None, shard_id=None, shard_equal_rows=False):
@@ -3551,6 +3571,7 @@ class CelebADataset(SourceDataset):
         args["shard_id"] = self.shard_id
         return args
 
+
 class TextFileDataset(SourceDataset):
     """
     A source dataset that reads and parses datasets stored on disk in text format.
diff --git a/mindspore/dataset/engine/validators.py b/mindspore/dataset/engine/validators.py
index 4f1bb2c2d7..fd6ecfffb0 100644
--- a/mindspore/dataset/engine/validators.py
+++ b/mindspore/dataset/engine/validators.py
@@ -324,6 +324,7 @@ def check_sampler_shuffle_shard_options(param_dict):
 
 def check_imagefolderdatasetv2(method):
     """A wrapper that wrap a parameter checker to the original Dataset(ImageFolderDatasetV2)."""
+
     @wraps(method)
     def new_method(*args, **kwargs):
         param_dict = make_param_dict(method, args, kwargs)
@@ -356,6 +357,7 @@ def check_imagefolderdatasetv2(method):
 
 def check_mnist_cifar_dataset(method):
     """A wrapper that wrap a parameter checker to the original Dataset(ManifestDataset, Cifar10/100Dataset)."""
+
     @wraps(method)
     def new_method(*args, **kwargs):
         param_dict = make_param_dict(method, args, kwargs)
@@ -382,6 +384,7 @@ def check_mnist_cifar_dataset(method):
 
 def check_manifestdataset(method):
     """A wrapper that wrap a parameter checker to the original Dataset(ManifestDataset)."""
+
     @wraps(method)
     def new_method(*args, **kwargs):
         param_dict = make_param_dict(method, args, kwargs)
@@ -414,6 +417,7 @@ def check_manifestdataset(method):
 
 def check_tfrecorddataset(method):
     """A wrapper that wrap a parameter checker to the original Dataset(TFRecordDataset)."""
+
     @wraps(method)
     def new_method(*args, **kwargs):
         param_dict = make_param_dict(method, args, kwargs)
@@ -444,6 +448,7 @@ def check_tfrecorddataset(method):
 
 def check_vocdataset(method):
     """A wrapper that wrap a parameter checker to the original Dataset(VOCDataset)."""
+
     @wraps(method)
     def new_method(*args, **kwargs):
         param_dict = make_param_dict(method, args, kwargs)
@@ -470,6 +475,7 @@ def check_vocdataset(method):
 
 def check_celebadataset(method):
     """A wrapper that wrap a parameter checker to the original Dataset(CelebADataset)."""
+
     @wraps(method)
     def new_method(*args, **kwargs):
         param_dict = make_param_dict(method, args, kwargs)
@@ -510,6 +516,7 @@ def check_celebadataset(method):
 
 def check_minddataset(method):
     """A wrapper that wrap a parameter checker to the original Dataset(MindDataset)."""
+
     @wraps(method)
     def new_method(*args, **kwargs):
         param_dict = make_param_dict(method, args, kwargs)
@@ -541,6 +548,7 @@ def check_minddataset(method):
 
 def check_generatordataset(method):
     """A wrapper that wrap a parameter checker to the original Dataset(GeneratorDataset)."""
+
     @wraps(method)
     def new_method(*args, **kwargs):
         param_dict = make_param_dict(method, args, kwargs)
@@ -628,8 +636,25 @@ def check_columns(columns, name):
         raise TypeError("{} should be either a list of strings or a single string.".format(name))
 
 
+def check_pad_info(key, val):
+    """check the key and value pair of pad_info in batch"""
+    check_type(key, "key in pad_info", str)
+    if val is not None:
+        assert len(val) == 2, "value of pad_info should be a tuple of size 2"
+        check_type(val, "value in pad_info", tuple)
+        if val[0] is not None:
+            check_type(val[0], "pad_shape", list)
+            for dim in val[0]:
+                if dim is not None:
+                    check_type(dim, "dim in pad_shape", int)
+                    assert dim > 0, "pad shape should be positive integers"
+        if val[1] is not None:
+            check_type(val[1], "pad_value", (int, float))
+
+
 def check_batch(method):
     """check the input arguments of batch."""
+
     @wraps(method)
     def new_method(*args, **kwargs):
         param_dict = make_param_dict(method, args, kwargs)
@@ -648,6 +673,14 @@ def check_batch(method):
 
         check_param_type(nreq_param_bool, param_dict, bool)
 
+        if (param_dict.get('pad_info') is not None) and (param_dict.get('per_batch_map') is not None):
+            raise ValueError("pad_info and per_batch_map can't both be set")
+
+        if param_dict.get('pad_info') is not None:
+            check_type(param_dict["pad_info"], "pad_info", dict)
+            for k, v in param_dict.get('pad_info').items():
+                check_pad_info(k, v)
+
         for param_name in nreq_param_columns:
             param = param_dict.get(param_name)
             if param is not None:
@@ -687,6 +720,7 @@ def check_sync_wait(method):
 
 def check_shuffle(method):
     """check the input arguments of shuffle."""
+
     @wraps(method)
     def new_method(*args, **kwargs):
         param_dict = make_param_dict(method, args, kwargs)
@@ -705,6 +739,7 @@ def check_shuffle(method):
 
 def check_map(method):
     """check the input arguments of map."""
+
     @wraps(method)
     def new_method(*args, **kwargs):
         param_dict = make_param_dict(method, args, kwargs)
@@ -729,6 +764,7 @@ def check_map(method):
 
 def check_filter(method):
     """"check the input arguments of filter."""
+
     @wraps(method)
     def new_method(*args, **kwargs):
         param_dict = make_param_dict(method, args, kwargs)
@@ -749,6 +785,7 @@ def check_filter(method):
 
 def check_repeat(method):
     """check the input arguments of repeat."""
+
     @wraps(method)
     def new_method(*args, **kwargs):
         param_dict = make_param_dict(method, args, kwargs)
@@ -764,6 +801,7 @@ def check_repeat(method):
 
 def check_skip(method):
     """check the input arguments of skip."""
+
     @wraps(method)
     def new_method(*args, **kwargs):
         param_dict = make_param_dict(method, args, kwargs)
@@ -780,6 +818,7 @@ def check_skip(method):
 
 def check_take(method):
     """check the input arguments of take."""
+
     @wraps(method)
     def new_method(*args, **kwargs):
         param_dict = make_param_dict(method, args, kwargs)
@@ -794,6 +833,7 @@ def check_take(method):
 
 def check_zip(method):
     """check the input arguments of zip."""
+
     @wraps(method)
     def new_method(*args, **kwargs):
         param_dict = make_param_dict(method, args, kwargs)
@@ -811,6 +851,7 @@ def check_zip(method):
 
 def check_zip_dataset(method):
     """check the input arguments of zip method in `Dataset`."""
+
     @wraps(method)
     def new_method(*args, **kwargs):
         param_dict = make_param_dict(method, args, kwargs)
@@ -830,6 +871,7 @@ def check_zip_dataset(method):
 
 def check_rename(method):
     """check the input arguments of rename."""
+
     @wraps(method)
     def new_method(*args, **kwargs):
         param_dict = make_param_dict(method, args, kwargs)
@@ -849,6 +891,7 @@ def check_rename(method):
 
 def check_project(method):
     """check the input arguments of project."""
+
     @wraps(method)
     def new_method(*args, **kwargs):
         param_dict = make_param_dict(method, args, kwargs)
@@ -876,6 +919,7 @@ def check_shape(shape, name):
 
 def check_add_column(method):
     """check the input arguments of add_column."""
+
     @wraps(method)
     def new_method(*args, **kwargs):
         param_dict = make_param_dict(method, args, kwargs)
@@ -905,6 +949,7 @@ def check_add_column(method):
 
 def check_textfiledataset(method):
     """A wrapper that wrap a parameter checker to the original Dataset(TextFileDataset)."""
+
     @wraps(method)
     def new_method(*args, **kwargs):
         param_dict = make_param_dict(method, args, kwargs)
diff --git a/tests/ut/cpp/dataset/batch_op_test.cc b/tests/ut/cpp/dataset/batch_op_test.cc
index 504cac51e5..866ebc9b19 100644
--- a/tests/ut/cpp/dataset/batch_op_test.cc
+++ b/tests/ut/cpp/dataset/batch_op_test.cc
@@ -30,16 +30,14 @@ namespace common = mindspore::common;
 namespace de = mindspore::dataset;
 
 using namespace mindspore::dataset;
-using mindspore::MsLogLevel::ERROR;
-using mindspore::ExceptionType::NoExceptionType;
 using mindspore::LogStream;
+using mindspore::ExceptionType::NoExceptionType;
+using mindspore::MsLogLevel::ERROR;
 
 class MindDataTestBatchOp : public UT::DatasetOpTesting {
  protected:
-
 };
 
-
 std::shared_ptr<de::BatchOp> Batch(int32_t batch_size = 1, bool drop = false, int rows_per_buf = 2) {
   Status rc;
   std::shared_ptr<de::BatchOp> op;
@@ -93,10 +91,8 @@ TEST_F(MindDataTestBatchOp, TestSimpleBatch) {
     rc = di.GetNextAsMap(&tensor_map);
     EXPECT_TRUE(rc.IsOk());
     std::shared_ptr<de::Tensor> t;
-    rc = de::Tensor::CreateTensor(&t,
-                                  TensorImpl::kFlexible, de::TensorShape({12, 1}),
-                                  de::DataType(DataType::DE_INT64),
-                                  (unsigned char *) payload);
+    rc = de::Tensor::CreateTensor(&t, TensorImpl::kFlexible, de::TensorShape({12, 1}), de::DataType(DataType::DE_INT64),
+                                  (unsigned char *)payload);
     EXPECT_TRUE(rc.IsOk());
     // verify the actual data in Tensor is correct
     EXPECT_EQ(*t == *tensor_map["col_sint64"], true);
@@ -111,7 +107,6 @@ TEST_F(MindDataTestBatchOp, TestSimpleBatch) {
   EXPECT_EQ(success, true);
 }
 
-
 TEST_F(MindDataTestBatchOp, TestRepeatBatchDropTrue) {
   std::string schema_file = datasets_root_path_ + "/testBatchDataset";
   bool success = false;
@@ -125,20 +120,14 @@ TEST_F(MindDataTestBatchOp, TestRepeatBatchDropTrue) {
                          -9223372036854775807 - 1, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 9223372036854775807};
     de::DatasetIterator di(tree);
     std::shared_ptr<de::Tensor> t1, t2, t3;
-    rc = de::Tensor::CreateTensor(&t1,
-                                  TensorImpl::kFlexible, de::TensorShape({7, 1}),
-                                  de::DataType(DataType::DE_INT64),
-                                  (unsigned char *) payload);
+    rc = de::Tensor::CreateTensor(&t1, TensorImpl::kFlexible, de::TensorShape({7, 1}), de::DataType(DataType::DE_INT64),
+                                  (unsigned char *)payload);
     EXPECT_TRUE(rc.IsOk());
-    rc = de::Tensor::CreateTensor(&t2,
-                                  TensorImpl::kFlexible, de::TensorShape({7, 1}),
-                                  de::DataType(DataType::DE_INT64),
-                                  (unsigned char *) (payload + 7));
+    rc = de::Tensor::CreateTensor(&t2, TensorImpl::kFlexible, de::TensorShape({7, 1}), de::DataType(DataType::DE_INT64),
+                                  (unsigned char *)(payload + 7));
     EXPECT_TRUE(rc.IsOk());
-    rc = de::Tensor::CreateTensor(&t3,
-                                  TensorImpl::kFlexible, de::TensorShape({7, 1}),
-                                  de::DataType(DataType::DE_INT64),
-                                  (unsigned char *) (payload + 2));
+    rc = de::Tensor::CreateTensor(&t3, TensorImpl::kFlexible, de::TensorShape({7, 1}), de::DataType(DataType::DE_INT64),
+                                  (unsigned char *)(payload + 2));
     EXPECT_TRUE(rc.IsOk());
 
     TensorMap tensor_map;
@@ -163,7 +152,6 @@ TEST_F(MindDataTestBatchOp, TestRepeatBatchDropTrue) {
   EXPECT_EQ(success, true);
 }
 
-
 TEST_F(MindDataTestBatchOp, TestRepeatBatchDropFalse) {
   std::string schema_file = datasets_root_path_ + "/testBatchDataset";
   bool success = false;
@@ -177,25 +165,17 @@ TEST_F(MindDataTestBatchOp, TestRepeatBatchDropFalse) {
                          -9223372036854775807 - 1, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 9223372036854775807};
     de::DatasetIterator di(tree);
     std::shared_ptr<de::Tensor> t1, t2, t3, t4;
-    rc = de::Tensor::CreateTensor(&t1,
-                                  TensorImpl::kFlexible, de::TensorShape({7, 1}),
-                                  de::DataType(DataType::DE_INT64),
-                                  (unsigned char *) payload);
+    rc = de::Tensor::CreateTensor(&t1, TensorImpl::kFlexible, de::TensorShape({7, 1}), de::DataType(DataType::DE_INT64),
+                                  (unsigned char *)payload);
     EXPECT_TRUE(rc.IsOk());
-    rc = de::Tensor::CreateTensor(&t2,
-                                  TensorImpl::kFlexible, de::TensorShape({7, 1}),
-                                  de::DataType(DataType::DE_INT64),
-                                  (unsigned char *) (payload + 7));
+    rc = de::Tensor::CreateTensor(&t2, TensorImpl::kFlexible, de::TensorShape({7, 1}), de::DataType(DataType::DE_INT64),
+                                  (unsigned char *)(payload + 7));
     EXPECT_TRUE(rc.IsOk());
-    rc = de::Tensor::CreateTensor(&t3,
-                                  TensorImpl::kFlexible, de::TensorShape({7, 1}),
-                                  de::DataType(DataType::DE_INT64),
-                                  (unsigned char *) (payload + 2));
+    rc = de::Tensor::CreateTensor(&t3, TensorImpl::kFlexible, de::TensorShape({7, 1}), de::DataType(DataType::DE_INT64),
+                                  (unsigned char *)(payload + 2));
     EXPECT_TRUE(rc.IsOk());
-    rc = de::Tensor::CreateTensor(&t4,
-                                  TensorImpl::kFlexible, de::TensorShape({3, 1}),
-                                  de::DataType(DataType::DE_INT64),
-                                  (unsigned char *) (payload + 9));
+    rc = de::Tensor::CreateTensor(&t4, TensorImpl::kFlexible, de::TensorShape({3, 1}), de::DataType(DataType::DE_INT64),
+                                  (unsigned char *)(payload + 9));
     EXPECT_TRUE(rc.IsOk());
 
     TensorMap tensor_map;
@@ -224,7 +204,6 @@ TEST_F(MindDataTestBatchOp, TestRepeatBatchDropFalse) {
   EXPECT_EQ(success, true);
 }
 
-
 TEST_F(MindDataTestBatchOp, TestBatchDropFalseRepeat) {
   std::string schema_file = datasets_root_path_ + "/testBatchDataset";
   bool success = false;
@@ -238,15 +217,11 @@ TEST_F(MindDataTestBatchOp, TestBatchDropFalseRepeat) {
                          -9223372036854775807 - 1, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 9223372036854775807};
     de::DatasetIterator di(tree);
     std::shared_ptr<de::Tensor> t1, t2;
-    rc = de::Tensor::CreateTensor(&t1,
-                                  TensorImpl::kFlexible, de::TensorShape({7, 1}),
-                                  de::DataType(DataType::DE_INT64),
-                                  (unsigned char *) payload);
+    rc = de::Tensor::CreateTensor(&t1, TensorImpl::kFlexible, de::TensorShape({7, 1}), de::DataType(DataType::DE_INT64),
+                                  (unsigned char *)payload);
     EXPECT_TRUE(rc.IsOk());
-    rc = de::Tensor::CreateTensor(&t2,
-                                  TensorImpl::kFlexible, de::TensorShape({5, 1}),
-                                  de::DataType(DataType::DE_INT64),
-                                  (unsigned char *) (payload + 7));
+    rc = de::Tensor::CreateTensor(&t2, TensorImpl::kFlexible, de::TensorShape({5, 1}), de::DataType(DataType::DE_INT64),
+                                  (unsigned char *)(payload + 7));
     EXPECT_TRUE(rc.IsOk());
 
     TensorMap tensor_map;
@@ -275,7 +250,6 @@ TEST_F(MindDataTestBatchOp, TestBatchDropFalseRepeat) {
   EXPECT_EQ(success, true);
 }
 
-
 TEST_F(MindDataTestBatchOp, TestBatchDropTrueRepeat) {
   std::string schema_file = datasets_root_path_ + "/testBatchDataset";
   bool success = false;
@@ -289,15 +263,11 @@ TEST_F(MindDataTestBatchOp, TestBatchDropTrueRepeat) {
                          -9223372036854775807 - 1, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 9223372036854775807};
     de::DatasetIterator di(tree);
     std::shared_ptr<de::Tensor> t1, t2;
-    rc = de::Tensor::CreateTensor(&t1,
-                                  TensorImpl::kFlexible, de::TensorShape({5, 1}),
-                                  de::DataType(DataType::DE_INT64),
-                                  (unsigned char *) payload);
+    rc = de::Tensor::CreateTensor(&t1, TensorImpl::kFlexible, de::TensorShape({5, 1}), de::DataType(DataType::DE_INT64),
+                                  (unsigned char *)payload);
     EXPECT_TRUE(rc.IsOk());
-    rc = de::Tensor::CreateTensor(&t2,
-                                  TensorImpl::kFlexible, de::TensorShape({5, 1}),
-                                  de::DataType(DataType::DE_INT64),
-                                  (unsigned char *) (payload + 5));
+    rc = de::Tensor::CreateTensor(&t2, TensorImpl::kFlexible, de::TensorShape({5, 1}), de::DataType(DataType::DE_INT64),
+                                  (unsigned char *)(payload + 5));
     EXPECT_TRUE(rc.IsOk());
 
     TensorMap tensor_map;
@@ -325,3 +295,31 @@ TEST_F(MindDataTestBatchOp, TestBatchDropTrueRepeat) {
   }
   EXPECT_EQ(success, true);
 }
+
+TEST_F(MindDataTestBatchOp, TestSimpleBatchPadding) {
+  std::string schema_file = datasets_root_path_ + "/testBatchDataset";
+  std::shared_ptr<BatchOp> op;
+  std::map<std::string, std::pair<TensorShape, float>> m;
+  m.insert({"col_1d", std::make_pair(TensorShape({4}), -1)});
+  de::BatchOp::Builder(12).SetDrop(false).SetPaddingMap(m, true).Build(&op);
+  auto tree = Build({Storage(schema_file), op});
+  tree->Prepare();
+  Status rc = tree->Launch();
+  if (rc.IsError()) {
+    MS_LOG(ERROR) << "Return code error detected during tree launch: " << rc.ToString() << ".";
+  } else {
+    int64_t payload[] = {-9223372036854775807 - 1,  1,  -1, -1, 2,  3,  -1, -1, 4,  5,  -1, -1, 6,  7,  -1, -1,
+                         8,  9,  -1, -1, 10, 11, -1, -1, 12, 13, -1, -1, 14, 15, -1, -1,
+                         16, 17, -1, -1, 18, 19, -1, -1, 20, 21, -1, -1, 22, 23, -1, -1};
+    std::shared_ptr<de::Tensor> t;
+    rc = de::Tensor::CreateTensor(&t, TensorImpl::kFlexible, de::TensorShape({12, 4}), de::DataType(DataType::DE_INT64),
+                                  (unsigned char *)payload);
+    de::DatasetIterator di(tree);
+    TensorMap tensor_map;
+    rc = di.GetNextAsMap(&tensor_map);
+    EXPECT_TRUE((*t) == (*(tensor_map["col_1d"])));
+    rc = di.GetNextAsMap(&tensor_map);
+    EXPECT_TRUE(tensor_map.size() == 0);
+    EXPECT_TRUE(rc.IsOk());
+  }
+}
diff --git a/tests/ut/python/dataset/test_pad_batch.py b/tests/ut/python/dataset/test_pad_batch.py
new file mode 100644
index 0000000000..7cfc34e718
--- /dev/null
+++ b/tests/ut/python/dataset/test_pad_batch.py
@@ -0,0 +1,213 @@
+# Copyright 2020 Huawei Technologies Co., Ltd
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+
+import mindspore.dataset as ds
+import numpy as np
+import time
+
+
+# This UT test tests the following cases
+
+# 1. padding: input_shape=[x] output_shape=[y] where y > x
+# 2. padding in one dimension and truncate in the other. input_shape=[x1,x2] output_shape=[y1,y2] y1>x1 and y2<x2
+# 3. automatic padding for a specific column
+# 4. default setting for all columns
+# 5. test None in different places
+
+# this generator function yield two columns
+# col1d: [0],[1], [2], [3]
+# col2d: [[100],[200]], [[101],[201]], [102],[202]], [103],[203]]
+def gen_2cols(num):
+    for i in range(num):
+        yield (np.array([i]), np.array([[i + 100], [i + 200]]))
+
+
+# this generator function yield one column of variable shapes
+# col: [0], [0,1], [0,1,2], [0,1,2,3]
+def gen_var_col(num):
+    for i in range(num):
+        yield (np.array([j for j in range(i + 1)]),)
+
+
+# this generator function yield two columns of variable shapes
+# col1: [0], [0,1], [0,1,2], [0,1,2,3]
+# col2: [100], [100,101], [100,101,102], [100,110,102,103]
+def gen_var_cols(num):
+    for i in range(num):
+        yield (np.array([j for j in range(i + 1)]), np.array([100 + j for j in range(i + 1)]))
+
+
+# this generator function yield two columns of variable shapes
+# col1: [[0]], [[0,1]], [[0,1,2]], [[0,1,2,3]]
+# col2: [[100]], [[100,101]], [[100,101,102]], [[100,110,102,103]]
+def gen_var_cols_2d(num):
+    for i in range(num):
+        yield (np.array([[j for j in range(i + 1)]]), np.array([[100 + j for j in range(i + 1)]]))
+
+
+def test_batch_padding_01():
+    data1 = ds.GeneratorDataset((lambda: gen_2cols(2)), ["col1d", "col2d"])
+    data1 = data1.batch(batch_size=2, drop_remainder=False, pad_info={"col2d": ([2, 2], -2), "col1d": ([2], -1)})
+    data1 = data1.repeat(2)
+    for data in data1.create_dict_iterator():
+        assert np.array_equal([[0, -1], [1, -1]], data["col1d"])
+        assert np.array_equal([[[100, -2], [200, -2]], [[101, -2], [201, -2]]], data["col2d"])
+
+
+def test_batch_padding_02():
+    data1 = ds.GeneratorDataset((lambda: gen_2cols(2)), ["col1d", "col2d"])
+    data1 = data1.batch(batch_size=2, drop_remainder=False, pad_info={"col2d": ([1, 2], -2)})
+    data1 = data1.repeat(2)
+    for data in data1.create_dict_iterator():
+        assert np.array_equal([[0], [1]], data["col1d"])
+        assert np.array_equal([[[100, -2]], [[101, -2]]], data["col2d"])
+
+
+def test_batch_padding_03():
+    data1 = ds.GeneratorDataset((lambda: gen_var_col(4)), ["col"])
+    data1 = data1.batch(batch_size=2, drop_remainder=False, pad_info={"col": (None, -1)})  # pad automatically
+    data1 = data1.repeat(2)
+    res = dict()
+    for ind, data in enumerate(data1.create_dict_iterator()):
+        res[ind] = data["col"].copy()
+    assert np.array_equal(res[0], [[0, -1], [0, 1]])
+    assert np.array_equal(res[1], [[0, 1, 2, -1], [0, 1, 2, 3]])
+    assert np.array_equal(res[2], [[0, -1], [0, 1]])
+    assert np.array_equal(res[3], [[0, 1, 2, -1], [0, 1, 2, 3]])
+
+
+def test_batch_padding_04():
+    data1 = ds.GeneratorDataset((lambda: gen_var_cols(2)), ["col1", "col2"])
+    data1 = data1.batch(batch_size=2, drop_remainder=False, pad_info={})  # pad automatically
+    data1 = data1.repeat(2)
+    for data in data1.create_dict_iterator():
+        assert np.array_equal(data["col1"], [[0, 0], [0, 1]])
+        assert np.array_equal(data["col2"], [[100, 0], [100, 101]])
+
+
+def test_batch_padding_05():
+    data1 = ds.GeneratorDataset((lambda: gen_var_cols_2d(3)), ["col1", "col2"])
+    data1 = data1.batch(batch_size=3, drop_remainder=False,
+                        pad_info={"col2": ([2, None], -2), "col1": (None, -1)})  # pad automatically
+    for data in data1.create_dict_iterator():
+        assert np.array_equal(data["col1"], [[[0, -1, -1]], [[0, 1, -1]], [[0, 1, 2]]])
+        assert np.array_equal(data["col2"], [[[100, -2, -2], [-2, -2, -2]], [[100, 101, -2], [-2, -2, -2]],
+                                             [[100, 101, 102], [-2, -2, -2]]])
+
+
+def batch_padding_performance_3d():
+    cifar10_dir = "../data/dataset/testCifar10Data"
+    data1 = ds.Cifar10Dataset(cifar10_dir, shuffle=False)  # shape = [32,32,3]
+    data1 = data1.repeat(24)
+    pad_info = {"image": ([36, 36, 3], 0)}
+    # pad_info = None
+    data1 = data1.batch(batch_size=24, drop_remainder=True, pad_info=pad_info)
+    start_time = time.time()
+    num_batches = 0
+    ret = []
+    for data in data1.create_dict_iterator():
+        num_batches += 1
+    res = "total number of batch:" + str(num_batches) + " time elapsed:" + str(time.time() - start_time)
+    # print(res)
+
+
+def batch_padding_performance_1d():
+    cifar10_dir = "../data/dataset/testCifar10Data"
+    data1 = ds.Cifar10Dataset(cifar10_dir, shuffle=False)  # shape = [32,32,3]
+    data1 = data1.repeat(24)
+    data1 = data1.map(input_columns="image", operations=(lambda x: x.reshape(-1)))
+    pad_info = {"image": ([3888], 0)}  # 3888 =36*36*3
+    # pad_info = None
+    data1 = data1.batch(batch_size=24, drop_remainder=True, pad_info=pad_info)
+    start_time = time.time()
+    num_batches = 0
+    for data in data1.create_dict_iterator():
+        num_batches += 1
+    res = "total number of batch:" + str(num_batches) + " time elapsed:" + str(time.time() - start_time)
+    # print(res)
+
+
+def batch_pyfunc_padding_3d():
+    cifar10_dir = "../data/dataset/testCifar10Data"
+    data1 = ds.Cifar10Dataset(cifar10_dir, shuffle=False)  # shape = [32,32,3]
+    data1 = data1.repeat(24)
+    # pad_info = {"image": ([36, 36, 3], 0)}
+    data1 = data1.map(input_columns="image", operations=(lambda x: np.pad(x, ((0, 4), (0, 4), (0, 0)))),
+                      python_multiprocessing=False)
+    data1 = data1.batch(batch_size=24, drop_remainder=True)
+    start_time = time.time()
+    num_batches = 0
+    for data in data1.create_dict_iterator():
+        num_batches += 1
+    res = "total number of batch:" + str(num_batches) + " time elapsed:" + str(time.time() - start_time)
+    # print(res)
+
+
+def batch_pyfunc_padding_1d():
+    cifar10_dir = "../data/dataset/testCifar10Data"
+    data1 = ds.Cifar10Dataset(cifar10_dir, shuffle=False)  # shape = [32,32,3]
+    data1 = data1.repeat(24)
+    data1 = data1.map(input_columns="image", operations=(lambda x: x.reshape(-1)))
+    data1 = data1.map(input_columns="image", operations=(lambda x: np.pad(x, (0, 816))), python_multiprocessing=False)
+    data1 = data1.batch(batch_size=24, drop_remainder=True)
+    start_time = time.time()
+    num_batches = 0
+    for data in data1.create_dict_iterator():
+        num_batches += 1
+    res = "total number of batch:" + str(num_batches) + " time elapsed:" + str(time.time() - start_time)
+    # print(res)
+
+
+# this function runs pad_batch and numpy.pad then compare the results
+def test_pad_via_map():
+    cifar10_dir = "../data/dataset/testCifar10Data"
+
+    def pad_map_config():
+        data1 = ds.Cifar10Dataset(cifar10_dir, shuffle=False, num_samples=1000)  # shape = [32,32,3]
+        data1 = data1.map(input_columns="image", operations=(lambda x: x.reshape(-1)))  # reshape to 1d
+        data1 = data1.map(input_columns="image", operations=(lambda x: np.pad(x, (0, 816))))
+        data1 = data1.batch(batch_size=25, drop_remainder=True)
+        res = []
+        for data in data1.create_dict_iterator():
+            res.append(data["image"])
+        return res
+
+    def pad_batch_config():
+        data2 = ds.Cifar10Dataset(cifar10_dir, shuffle=False, num_samples=1000)  # shape = [32,32,3]
+        data2 = data2.map(input_columns="image", operations=(lambda x: x.reshape(-1)))  # reshape to 1d
+        data2 = data2.batch(batch_size=25, drop_remainder=True, pad_info={"image": ([3888], 0)})
+        res = []
+        for data in data2.create_dict_iterator():
+            res.append(data["image"])
+        return res
+
+    res_from_map = pad_map_config()
+    res_from_batch = pad_batch_config()
+    assert len(res_from_batch) == len(res_from_batch)
+    for i in range(len(res_from_map)):
+        assert np.array_equal(res_from_map[i], res_from_batch[i])
+
+
+if __name__ == '__main__':
+    test_batch_padding_01()
+    test_batch_padding_02()
+    test_batch_padding_03()
+    test_batch_padding_04()
+    test_batch_padding_05()
+    # batch_padding_performance_3d()
+    # batch_padding_performance_1d()
+    # batch_pyfunc_padding_3d()
+    # batch_pyfunc_padding_1d()
+    test_pad_via_map()