diff --git a/mindspore/ccsrc/dataset/api/python_bindings.cc b/mindspore/ccsrc/dataset/api/python_bindings.cc
index 8a8e88be57..5391ad7cb3 100644
--- a/mindspore/ccsrc/dataset/api/python_bindings.cc
+++ b/mindspore/ccsrc/dataset/api/python_bindings.cc
@@ -40,6 +40,7 @@
 #include "dataset/kernels/image/resize_op.h"
 #include "dataset/kernels/image/uniform_aug_op.h"
 #include "dataset/kernels/image/bounding_box_augment_op.h"
+#include "dataset/kernels/data/duplicate_op.h"
 #include "dataset/kernels/data/fill_op.h"
 #include "dataset/kernels/data/mask_op.h"
 #include "dataset/kernels/data/pad_end_op.h"
@@ -443,6 +444,9 @@ void bindTensorOps2(py::module *m) {
                                                               "Tensor mask operation using relational comparator")
     .def(py::init<RelationalOp, std::shared_ptr<Tensor>, DataType>());
 
+  (void)py::class_<DuplicateOp, TensorOp, std::shared_ptr<DuplicateOp>>(*m, "DuplicateOp", "Duplicate tensor.")
+    .def(py::init<>());
+
   (void)py::class_<TruncateSequencePairOp, TensorOp, std::shared_ptr<TruncateSequencePairOp>>(
     *m, "TruncateSequencePairOp", "Tensor operation to truncate two tensors to a max_length")
     .def(py::init<int64_t>());
diff --git a/mindspore/ccsrc/dataset/core/tensor.h b/mindspore/ccsrc/dataset/core/tensor.h
index 0aec84f77b..a3dbb391e5 100644
--- a/mindspore/ccsrc/dataset/core/tensor.h
+++ b/mindspore/ccsrc/dataset/core/tensor.h
@@ -115,6 +115,16 @@ class Tensor {
   static Status CreateTensor(std::shared_ptr<Tensor> *, TensorImpl tensor_impl, const TensorShape &shape, DataType type,
                              const unsigned char *data = nullptr);
 
+  /// Create a copy of the input tensor
+  /// \param out [out] output tensor to be generated
+  /// \param in [in] orginal tensor to be copied
+  /// \return Status
+  static Status CreateTensor(std::shared_ptr<Tensor> *out, const std::shared_ptr<Tensor> &in) {
+    const TensorAlloc *alloc = GlobalContext::Instance()->tensor_allocator();
+    *out = std::allocate_shared<Tensor>(*alloc, in->shape(), in->type(), in->GetBuffer(), in->SizeInBytes());
+    return Status::OK();
+  }
+
   // A static factory method to create a Tensor from a given py::array.
   // @param ptr output argument to hold the created Tensor
   // @param arr py::array
diff --git a/mindspore/ccsrc/dataset/kernels/data/CMakeLists.txt b/mindspore/ccsrc/dataset/kernels/data/CMakeLists.txt
index 1df952f351..9131c9c667 100644
--- a/mindspore/ccsrc/dataset/kernels/data/CMakeLists.txt
+++ b/mindspore/ccsrc/dataset/kernels/data/CMakeLists.txt
@@ -10,4 +10,5 @@ add_library(kernels-data OBJECT
         slice_op.cc
         mask_op.cc
         concatenate_op.cc
+        duplicate_op.cc
         )
diff --git a/mindspore/ccsrc/dataset/kernels/data/duplicate_op.cc b/mindspore/ccsrc/dataset/kernels/data/duplicate_op.cc
new file mode 100644
index 0000000000..959516a4aa
--- /dev/null
+++ b/mindspore/ccsrc/dataset/kernels/data/duplicate_op.cc
@@ -0,0 +1,35 @@
+/**
+ * Copyright 2020 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "dataset/kernels/data/duplicate_op.h"
+
+#include "dataset/core/tensor.h"
+#include "dataset/kernels/tensor_op.h"
+
+namespace mindspore {
+namespace dataset {
+
+Status DuplicateOp::Compute(const TensorRow &input, TensorRow *output) {
+  IO_CHECK_VECTOR(input, output);
+  CHECK_FAIL_RETURN_UNEXPECTED(input.size() == 1, "Input should be one tensor");
+  std::shared_ptr<Tensor> out;
+  RETURN_IF_NOT_OK(Tensor::CreateTensor(&out, input[0]));
+  output->push_back(input[0]);
+  output->push_back(out);
+  return Status::OK();
+}
+}  // namespace dataset
+}  // namespace mindspore
diff --git a/mindspore/ccsrc/dataset/kernels/data/duplicate_op.h b/mindspore/ccsrc/dataset/kernels/data/duplicate_op.h
new file mode 100644
index 0000000000..4c9d6d36c9
--- /dev/null
+++ b/mindspore/ccsrc/dataset/kernels/data/duplicate_op.h
@@ -0,0 +1,42 @@
+/**
+ * Copyright 2020 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#ifndef DATASET_KERNELS_DATA_DUPLICATE_OP_H_
+#define DATASET_KERNELS_DATA_DUPLICATE_OP_H_
+
+#include <vector>
+#include <memory>
+
+#include "dataset/core/tensor.h"
+#include "dataset/kernels/tensor_op.h"
+
+namespace mindspore {
+namespace dataset {
+
+class DuplicateOp : public TensorOp {
+ public:
+  DuplicateOp() = default;
+
+  ~DuplicateOp() override = default;
+
+  void Print(std::ostream &out) const override { out << "DuplicateOp"; }
+
+  Status Compute(const TensorRow &input, TensorRow *output) override;
+
+  uint32_t NumOutput() override { return 2; }
+};
+}  // namespace dataset
+}  // namespace mindspore
+#endif  // DATASET_KERNELS_DUPLICATE_OP_H_
diff --git a/mindspore/dataset/engine/datasets.py b/mindspore/dataset/engine/datasets.py
index 4482f029cd..70e9b763f6 100644
--- a/mindspore/dataset/engine/datasets.py
+++ b/mindspore/dataset/engine/datasets.py
@@ -4869,10 +4869,10 @@ class BuildVocabDataset(DatasetOp):
         top_k(int, optional): top_k > 0. Number of words to be built into vocab. top_k most frequent words are
             taken. The top_k is taken after freq_range. If not enough top_k, all words will be taken (default=None,
             all words are included).
-        special_tokens(list):  a list of strings, each one is a special token. for e.g. ["<pad>","<unk>"]
-            (default=None, no special tokens will be added).
-        special_first(bool): whether special_tokens will be prepended/appended to vocab, If special_tokens is
-            specified and special_first is set to None, special_tokens will be prepended. (default=None).
+        special_tokens(list, optional):  a list of strings, each one is a special token. for example
+            special_tokens=["<pad>","<unk>"] (default=None, no special tokens will be added).
+        special_first(bool, optional): whether special_tokens will be prepended/appended to vocab, If special_tokens
+            is specified and special_first is set to None, special_tokens will be prepended. (default=None).
         prefetch_size (int, optional): prefetch number of records ahead of the user's request (default=None).
     """
 
diff --git a/mindspore/dataset/text/utils.py b/mindspore/dataset/text/utils.py
index c09b6d71d7..483e91bead 100644
--- a/mindspore/dataset/text/utils.py
+++ b/mindspore/dataset/text/utils.py
@@ -50,8 +50,8 @@ class Vocab(cde.Vocab):
             top_k(int, optional): top_k > 0. Number of words to be built into vocab. top_k most frequent words are
                 taken. top_k is taken after freq_range. If not enough top_k, all words will be taken. (default=None
                 all words are included).
-            special_tokens(list):  a list of strings, each one is a special token. for e.g. ["<pad>","<unk>"]
-                (default=None, no special tokens will be added).
+            special_tokens(list, optional):  a list of strings, each one is a special token. for example
+                special_tokens=["<pad>","<unk>"] (default=None, no special tokens will be added).
             special_first(bool, optional): whether special_tokens will be prepended/appended to vocab. If special_tokens
                 is specified and special_first is set to None, special_tokens will be prepended. (default=None).
         return:
@@ -72,8 +72,8 @@ class Vocab(cde.Vocab):
             build a vocab object from a list of word.
         Args:
             word_list(list): a list of string where each element is a word of type string.
-            special_tokens(list):  a list of strings, each one is a special token. for e.g. ["<pad>","<unk>"]
-                (default=None, no special tokens will be added).
+            special_tokens(list, optional):  a list of strings, each one is a special token. for example
+                special_tokens=["<pad>","<unk>"] (default=None, no special tokens will be added).
             special_first(bool, optional): whether special_tokens will be prepended/appended to vocab, If special_tokens
                 is specified and special_first is set to None, special_tokens will be prepended. (default=None).
         """
@@ -89,8 +89,8 @@ class Vocab(cde.Vocab):
             delimiter(str, optional): a delimiter to break up each line in file, the first element is taken to be
                 the word (default=None).
             vocab_size(int, optional): number of words to read from file_path (default=None, all words are taken).
-            special_tokens(list):  a list of strings, each one is a special token. for e.g. ["<pad>","<unk>"]
-                (default=None, no special tokens will be added).
+            special_tokens(list, optional):  a list of strings, each one is a special token. for example
+                special_tokens=["<pad>","<unk>"] (default=None, no special tokens will be added).
             special_first(bool, optional): whether special_tokens will be prepended/appended to vocab, If special_tokens
                 is specified and special_first is set to None, special_tokens will be prepended. (default=None).
         """
diff --git a/mindspore/dataset/transforms/c_transforms.py b/mindspore/dataset/transforms/c_transforms.py
index 903315ef0b..e69f9371c9 100644
--- a/mindspore/dataset/transforms/c_transforms.py
+++ b/mindspore/dataset/transforms/c_transforms.py
@@ -203,3 +203,22 @@ class Concatenate(cde.ConcatenateOp):
     def __init__(self, axis=0, prepend=None, append=None):
         # add some validations here later
         super().__init__(axis, prepend, append)
+
+
+class Duplicate(cde.DuplicateOp):
+    """
+    Duplicate the input tensor to a new output tensor. The input tensor is carried over to the output list.
+        Examples:
+        >>> # Data before
+        >>> # |  x      |
+        >>> # +---------+
+        >>> # | [1,2,3] |
+        >>> # +---------+
+        >>> data = data.map(input_columns=["x"], operations=Duplicate(),
+        >>>         output_columns=["x", "y"], output_order=["x", "y"])
+        >>> # Data after
+        >>> # |  x      |  y      |
+        >>> # +---------+---------+
+        >>> # | [1,2,3] | [1,2,3] |
+        >>> # +---------+---------+
+    """
diff --git a/tests/ut/cpp/dataset/duplicate_op_test.cc b/tests/ut/cpp/dataset/duplicate_op_test.cc
new file mode 100644
index 0000000000..6c9c00a30e
--- /dev/null
+++ b/tests/ut/cpp/dataset/duplicate_op_test.cc
@@ -0,0 +1,49 @@
+/**
+ * Copyright 2020 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#include "dataset/core/client.h"
+#include "common/common.h"
+#include "gtest/gtest.h"
+#include "dataset/core/tensor.h"
+#include "dataset/util/de_error.h"
+#include "dataset/kernels/data/duplicate_op.h"
+
+using namespace mindspore::dataset;
+
+namespace py = pybind11;
+
+class MindDataTestDuplicateOp : public UT::Common {
+ public:
+  MindDataTestDuplicateOp() {}
+
+  void SetUp() { GlobalInit(); }
+};
+
+TEST_F(MindDataTestDuplicateOp, Basics) {
+  std::shared_ptr<Tensor> t;
+  Tensor::CreateTensor(&t, std::vector<uint32_t>({1, 2, 3, 4, 5, 6}));
+  std::shared_ptr<Tensor> v;
+  Tensor::CreateTensor(&v, std::vector<uint32_t>({3}), TensorShape::CreateScalar());
+  std::shared_ptr<DuplicateOp> op = std::make_shared<DuplicateOp>();
+  TensorRow in;
+  in.push_back(t);
+  TensorRow out;
+  ASSERT_TRUE(op->Compute(in, &out).IsOk());
+
+  ASSERT_TRUE(*t == *out[0]);
+  ASSERT_TRUE(*t == *out[1]);
+  ASSERT_TRUE(t->GetBuffer() == out[0]->GetBuffer());
+  ASSERT_TRUE(t->GetBuffer() != out[1]->GetBuffer());
+}
diff --git a/tests/ut/python/dataset/test_duplicate_op.py b/tests/ut/python/dataset/test_duplicate_op.py
new file mode 100644
index 0000000000..9de3453a7e
--- /dev/null
+++ b/tests/ut/python/dataset/test_duplicate_op.py
@@ -0,0 +1,40 @@
+# Copyright 2020 Huawei Technologies Co., Ltd
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""
+Testing Duplicate op in DE
+"""
+import numpy as np
+
+import mindspore.dataset as ds
+import mindspore.dataset.transforms.c_transforms as ops
+
+
+def compare(array):
+    data = ds.NumpySlicesDataset([array], column_names="x")
+    array = np.array(array)
+    data = data.map(input_columns=["x"], output_columns=["x", "y"], columns_order=["x", "y"],
+                    operations=ops.Duplicate())
+    for d in data.create_dict_iterator():
+        np.testing.assert_array_equal(array, d["x"])
+        np.testing.assert_array_equal(array, d["y"])
+
+
+def test_duplicate_basics():
+    compare([1, 2, 3])
+    compare([b"1", b"2", b"3"])
+
+
+if __name__ == "__main__":
+    test_duplicate_basics()
diff --git a/tests/ut/python/dataset/test_vocab.py b/tests/ut/python/dataset/test_vocab.py
index 425a79e069..35411e5c80 100644
--- a/tests/ut/python/dataset/test_vocab.py
+++ b/tests/ut/python/dataset/test_vocab.py
@@ -1,4 +1,4 @@
-# Copyright 2019 Huawei Technologies Co., Ltd
+# Copyright 2020 Huawei Technologies Co., Ltd
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -94,9 +94,10 @@ def test_from_file():
         for word in texts.split(" "):
             yield (np.array(word, dtype='S'),)
 
-    def test_config(lookup_str, special_tokens, special_first):
+    def test_config(lookup_str, vocab_size, special_tokens, special_first):
         try:
-            vocab = text.Vocab.from_file(SIMPLE_VOCAB_FILE, special_tokens=special_tokens, special_first=special_first)
+            vocab = text.Vocab.from_file(SIMPLE_VOCAB_FILE, vocab_size=vocab_size, special_tokens=special_tokens,
+                                         special_first=special_first)
             data = ds.GeneratorDataset(gen(lookup_str), column_names=["text"])
             data = data.map(input_columns=["text"], operations=text.Lookup(vocab))
             res = []
@@ -106,9 +107,14 @@ def test_from_file():
         except ValueError as e:
             return str(e)
 
-    assert test_config("w1 w2 w3", ["s1", "s2", "s3"], True) == [3, 4, 5]
-    assert test_config("w1 w2 w3", ["s1", "s2", "s3"], False) == [0, 1, 2]
-    assert "special_tokens contains duplicate" in test_config("w1", ["s1", "s1"], True)
+    # test special tokens are prepended
+    assert test_config("w1 w2 w3 s1 s2 s3", None, ["s1", "s2", "s3"], True) == [3, 4, 5, 0, 1, 2]
+    # test special tokens are appended
+    assert test_config("w1 w2 w3 s1 s2 s3", None, ["s1", "s2", "s3"], False) == [0, 1, 2, 8, 9, 10]
+    # test special tokens are prepended when not all words in file are used
+    assert test_config("w1 w2 w3 s1 s2 s3", 3, ["s1", "s2", "s3"], False) == [0, 1, 2, 3, 4, 5]
+    # text exception special_words contains duplicate words
+    assert "special_tokens contains duplicate" in test_config("w1", None, ["s1", "s1"], True)
 
 
 if __name__ == '__main__':