From db6aa862d5aecff3c9f68fd51a365ddbb57bb59c Mon Sep 17 00:00:00 2001 From: He Wei Date: Tue, 28 Jul 2020 20:41:27 +0800 Subject: [PATCH] Optimize tensor data Replace std::vector with std::unique_ptr for tensor data storage, it prevent unintended data initialization when data is lazy allocated. --- mindspore/core/ir/tensor.cc | 92 ++++++++++++++++++++++++------------- mindspore/core/ir/tensor.h | 2 +- 2 files changed, 61 insertions(+), 33 deletions(-) diff --git a/mindspore/core/ir/tensor.cc b/mindspore/core/ir/tensor.cc index c7f3b8fce8..7d1403b48b 100644 --- a/mindspore/core/ir/tensor.cc +++ b/mindspore/core/ir/tensor.cc @@ -20,6 +20,7 @@ #include #include #include +#include #include #include #include @@ -53,54 +54,80 @@ static size_t SizeOf(const std::vector &shape) { return std::accumulate(shape.begin(), shape.end(), size_t(1), std::multiplies()); } +template +std::unique_ptr NewData(const U *input, size_t size) { + if (input == nullptr || size == 0) { + return nullptr; + } + auto data = std::make_unique(size); + if constexpr (!std::is_same::value && (std::is_same::value || std::is_same::value)) { + // Because float16 do not support implicit cast from/to other types, + // We can not use std::copy() on array of float16, use a loop here. + for (size_t i = 0; i < size; ++i) { + data[i] = static_cast(input[i]); + } + } else { + // otherwise, use std::copy for better performance. + std::copy(input, input + size, data.get()); + } + return data; +} + +template +std::unique_ptr NewData(Scalar scalar) { + auto data = std::make_unique(1); + data[0] = static_cast(scalar); + return data; +} + template -std::vector CopyData(const std::vector &shape, void *data, TypeId data_type) { - const size_t count = SizeOf(shape); +std::unique_ptr CopyData(const std::vector &shape, void *data, TypeId data_type) { + const size_t size = SizeOf(shape); switch (data_type) { case kNumberTypeBool: case kNumberTypeUInt8: { auto buf = static_cast(data); - return std::vector(buf, buf + count); + return NewData(buf, size); } case kNumberTypeInt8: { auto buf = static_cast(data); - return std::vector(buf, buf + count); + return NewData(buf, size); } case kNumberTypeInt16: { auto buf = static_cast(data); - return std::vector(buf, buf + count); + return NewData(buf, size); } case kNumberTypeInt32: { auto buf = static_cast(data); - return std::vector(buf, buf + count); + return NewData(buf, size); } case kNumberTypeInt64: { auto buf = static_cast(data); - return std::vector(buf, buf + count); + return NewData(buf, size); } case kNumberTypeUInt16: { auto buf = static_cast(data); - return std::vector(buf, buf + count); + return NewData(buf, size); } case kNumberTypeUInt32: { auto buf = static_cast(data); - return std::vector(buf, buf + count); + return NewData(buf, size); } case kNumberTypeUInt64: { auto buf = static_cast(data); - return std::vector(buf, buf + count); + return NewData(buf, size); } case kNumberTypeFloat16: { auto buf = static_cast(data); - return std::vector(buf, buf + count); + return NewData(buf, size); } case kNumberTypeFloat32: { - const float *buf = static_cast(data); - return std::vector(buf, buf + count); + auto buf = static_cast(data); + return NewData(buf, size); } case kNumberTypeFloat64: { auto buf = static_cast(data); - return std::vector(buf, buf + count); + return NewData(buf, size); } default: break; @@ -109,14 +136,14 @@ std::vector CopyData(const std::vector &shape, void *data, TypeId data_t } template -std::vector CopyData(const std::vector &shape, void *data, size_t data_len) { +std::unique_ptr CopyData(const std::vector &shape, void *data, size_t data_len) { size_t size = SizeOf(shape); if (size * sizeof(T) != data_len) { MS_LOG(EXCEPTION) << "Incorrect tensor input data length " << data_len << ", expect " << size * sizeof(T) << " item size " << sizeof(T); } auto buf = static_cast(data); - return {buf, buf + size}; + return NewData(buf, size); } // Tensor data implementation. @@ -132,13 +159,13 @@ class TensorDataImpl : public TensorData { TensorDataImpl(const std::vector &shape, void *data, TypeId data_type) : ndim_(shape.size()), data_size_(SizeOf(shape)), data_(CopyData(shape, data, data_type)) {} - template - TensorDataImpl(const std::vector &shape, InputIt first, InputIt last) - : ndim_(shape.size()), data_size_(SizeOf(shape)), data_(first, last) {} + template + TensorDataImpl(const std::vector &shape, const U *input, size_t size) + : ndim_(shape.size()), data_size_(SizeOf(shape)), data_(NewData(input, size)) {} template TensorDataImpl(const std::vector &shape, Scalar scalar) - : ndim_(shape.size()), data_size_(SizeOf(shape)), data_({static_cast(scalar)}) {} + : ndim_(shape.size()), data_size_(SizeOf(shape)), data_(NewData(scalar)) {} ssize_t size() const override { return static_cast(data_size_); } @@ -149,24 +176,25 @@ class TensorDataImpl : public TensorData { ssize_t ndim() const override { return static_cast(ndim_); } void *data() override { - static std::vector empty_data(1); + static T empty_data = static_cast(0); if (data_size_ == 0) { // Prevent null pointer for empty shape. - return empty_data.data(); + return &empty_data; } // Lazy allocation. - if (data_.empty()) { - data_.resize(data_size_); + if (data_ == nullptr) { + data_ = std::make_unique(data_size_); } - return data_.data(); + return data_.get(); } bool equals(const TensorData &other) const override { auto ptr = dynamic_cast *>(&other); - if (ptr) { - return (ptr == this) || ((ndim_ == ptr->ndim_) && (data_size_ == ptr->data_size_) && (data_ == ptr->data_)); + if (ptr == nullptr) { + return false; } - return false; + return (ptr == this) || ((ndim_ == ptr->ndim_) && (data_size_ == ptr->data_size_) && + (std::equal(data_.get(), data_.get() + data_size_, ptr->data_.get()))); } std::string ToString(const TypeId type, const std::vector &shape) const override { @@ -179,7 +207,7 @@ class TensorDataImpl : public TensorData { if (data_size_ == 0) { return ""; } - if (data_.empty()) { + if (data_ == nullptr) { return ""; } @@ -309,7 +337,7 @@ class TensorDataImpl : public TensorData { size_t ndim_{0}; size_t data_size_{0}; - std::vector data_; + std::unique_ptr data_; }; template @@ -374,12 +402,12 @@ Tensor::Tensor(TypeId data_type, const std::vector &shape, void *data, Type Tensor::Tensor(const std::vector &input, const TypePtr &data_type) : MetaTensor(TypeIdOf(data_type, kNumberTypeInt32), {static_cast(input.size())}), - data_(MakeTensorData(data_type_, shape_, input.begin(), input.end())), + data_(MakeTensorData(data_type_, shape_, input.data(), input.size())), id_(MakeId()) {} Tensor::Tensor(const std::vector &input, const TypePtr &data_type) : MetaTensor(TypeIdOf(data_type, kNumberTypeFloat32), {static_cast(input.size())}), - data_(MakeTensorData(data_type_, shape_, input.begin(), input.end())), + data_(MakeTensorData(data_type_, shape_, input.data(), input.size())), id_(MakeId()) {} Tensor::Tensor(int64_t input, const TypePtr &data_type) diff --git a/mindspore/core/ir/tensor.h b/mindspore/core/ir/tensor.h index 2f844b7171..c61add5a23 100644 --- a/mindspore/core/ir/tensor.h +++ b/mindspore/core/ir/tensor.h @@ -82,7 +82,7 @@ class Tensor : public MetaTensor { // param data The shared tensor data. Tensor(TypeId data_type, const std::vector &shape, TensorDataPtr data); - // brief Create an all zero tensor. + // brief Create a lazy allocated tensor. // // param data_type [TypeId] Data type of the tensor. // param shape The shape represented by std::vector of the tensor.