!14639 Remove DataBuffer class

From: @hfarahat
Reviewed-by: @pandoublefeng,@robingrosman
Signed-off-by: @pandoublefeng
pull/14639/MERGE
mindspore-ci-bot 4 years ago committed by Gitee
commit 3d85930cfe

@ -13,7 +13,6 @@ file(GLOB_RECURSE _CURRENT_SRC_FILES RELATIVE ${CMAKE_CURRENT_SOURCE_DIR} "*.cc"
set_property(SOURCE ${_CURRENT_SRC_FILES} PROPERTY COMPILE_DEFINITIONS SUBMODULE_ID=mindspore::SubModuleId::SM_MD)
set(SRC_FILES_LIST
execution_tree.cc
data_buffer.cc
data_schema.cc
dataset_iterator.cc
tree_adapter.cc

@ -34,7 +34,7 @@
#else
#include "minddata/dataset/engine/cache/stub/cache_grpc_client.h"
#endif
#include "minddata/dataset/engine/data_buffer.h"
#include "minddata/dataset/util/lock.h"
#include "minddata/dataset/util/cond_var.h"
#include "minddata/dataset/util/queue_map.h"

@ -22,7 +22,7 @@
#include <iomanip>
#include <sstream>
#include "minddata/dataset/core/tensor.h"
#include "minddata/dataset/engine/data_buffer.h"
#include "minddata/dataset/engine/data_schema.h"
#include "minddata/dataset/util/random.h"
#include "minddata/dataset/util/services.h"

@ -115,7 +115,6 @@ Status IteratorConsumer::GetNextAsOrderedPair(std::vector<std::pair<std::string,
Status ToDevice::Init(std::shared_ptr<DatasetNode> d) { return tree_adapter_->Compile(std::move(d), num_epochs_); }
Status ToDevice::Send() {
std::unique_ptr<DataBuffer> db;
RETURN_IF_NOT_OK(tree_adapter_->Launch());
std::shared_ptr<DatasetOp> root = std::shared_ptr<DatasetOp>(tree_adapter_->GetRoot());
CHECK_FAIL_RETURN_UNEXPECTED(root != nullptr, "Root is a nullptr.");

@ -1,89 +0,0 @@
/**
* Copyright 2019 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "minddata/dataset/engine/data_buffer.h"
#include "minddata/dataset/util/allocator.h"
#include "minddata/dataset/core/global_context.h"
#include "minddata/dataset/core/tensor.h"
namespace mindspore {
namespace dataset {
// Name: Constructor #1
// Description: This is the main constructor that is used for making a buffer
DataBuffer::DataBuffer(int32_t id, BufferFlags flags) : buffer_id_(id), tensor_table_(nullptr), buffer_flags_(flags) {}
// A method for debug printing of the buffer
void DataBuffer::Print(std::ostream &out, bool show_all) const {
out << "bufferId: " << buffer_id_ << "\nflags: " << std::hex << buffer_flags_ << std::dec << "\n";
// If the column counts are set then it means that data has been set into
// the tensor table. Display the tensor table here.
if (this->NumCols() > 0) {
out << "Tensor table:\n";
for (int32_t row = 0; row < DataBuffer::NumRows(); ++row) {
out << "Row # : " << row << "\n";
TensorRow currRow = (*tensor_table_)[row];
for (int32_t col = 0; col < this->NumCols(); ++col) {
out << "Column #: " << col << "\n"; // Should add the column name here as well?
// Call the tensor display
out << *(currRow[col]) << "\n";
}
}
}
}
// Remove me!! Callers should fetch rows via pop
Status DataBuffer::GetTensor(std::shared_ptr<Tensor> *ptr, int32_t row_id, int32_t col_id) const {
if (row_id < tensor_table_->size() && col_id < tensor_table_->at(row_id).size()) {
*ptr = (tensor_table_->at(row_id)).at(col_id);
} else {
std::string err_msg =
"indices for mTensorTable out of range: (" + std::to_string(row_id) + "," + std::to_string(col_id) + ").";
RETURN_STATUS_UNEXPECTED(err_msg);
}
return Status::OK();
}
// Remove me!! Callers should fetch rows via pop
Status DataBuffer::GetRow(int32_t row_id, TensorRow *ptr) const {
if (tensor_table_ && !tensor_table_->empty() && row_id < tensor_table_->size()) {
*ptr = tensor_table_->at(row_id);
} else {
std::string err_msg = "rowId for mTensorTable out of range: " + std::to_string(row_id);
RETURN_STATUS_UNEXPECTED(err_msg);
}
return Status::OK();
}
Status DataBuffer::PopRow(TensorRow *ptr) {
if (tensor_table_ && !tensor_table_->empty()) {
*ptr = std::move(tensor_table_->front());
tensor_table_->pop_front();
}
return Status::OK();
}
Status DataBuffer::SliceOff(int64_t number_of_rows) {
while (number_of_rows > 0) {
tensor_table_->pop_back();
number_of_rows--;
}
return Status::OK();
}
} // namespace dataset
} // namespace mindspore

@ -1,114 +0,0 @@
/**
* Copyright 2019-2021 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef MINDSPORE_CCSRC_MINDDATA_DATASET_ENGINE_DATA_BUFFER_H_
#define MINDSPORE_CCSRC_MINDDATA_DATASET_ENGINE_DATA_BUFFER_H_
#include <iostream>
#include <memory>
#include <string>
#include <utility>
#include <vector>
#include "minddata/dataset/util/allocator.h"
#include "minddata/dataset/util/status.h"
#include "minddata/dataset/include/constants.h"
#include "minddata/dataset/core/tensor.h"
#include "minddata/dataset/core/tensor_row.h"
namespace mindspore {
namespace dataset {
/// \brief The DataBuffer class is a container of tensor data and is the unit of transmission between
/// connectors of dataset operators. Inside the buffer, tensors are organized into a table-like format
/// where n TensorRows may consist of m tensors (columns).
class DataBuffer {
public:
// Buffer flags
enum BufferFlags : uint32_t {
kDeBFlagNone = 0,
kDeBFlagEOF = 1, // The buffer is an eof end-of-data msg
kDeBFlagEOE = 1u << 1, // The buffer is an eoe end-of-epoch msg
kDeBFlagWait = 1u << 2, // The buffer is an control signal for workers to suspend operations
kDeBFlagQuit = 1u << 3 // The buffer is a control signal for workers to quit
};
// Name: Constructor #1
// Description: This is the main constructor that is used for making a buffer
DataBuffer(int32_t id, BufferFlags flags);
/// \brief default destructor
~DataBuffer() = default;
/// \brief A method for debug printing of the buffer
/// \param[in/out] out The stream to write to
/// \param[in] show_all A boolean to toggle between details and summary printing
void Print(std::ostream &out, bool show_all) const;
// Provide stream operator for displaying it
friend std::ostream &operator<<(std::ostream &out, const DataBuffer &cb) {
cb.Print(out, false);
return out;
}
// Convenience getter functions for flag checking
bool eof() const { return (static_cast<uint32_t>(buffer_flags_) & static_cast<uint32_t>(kDeBFlagEOF)); }
bool eoe() const { return (static_cast<uint32_t>(buffer_flags_) & static_cast<uint32_t>(kDeBFlagEOE)); }
bool wait() const { return (static_cast<uint32_t>(buffer_flags_) & static_cast<uint32_t>(kDeBFlagWait)); }
bool quit() const { return (static_cast<uint32_t>(buffer_flags_) & static_cast<uint32_t>(kDeBFlagQuit)); }
// Simple getter funcs
int32_t id() const { return buffer_id_; }
void set_id(int32_t id) { buffer_id_ = id; }
int32_t NumRows() const { return ((tensor_table_) ? tensor_table_->size() : 0); }
int32_t NumCols() const {
return (tensor_table_ == nullptr || tensor_table_->empty()) ? 0 : tensor_table_->at(0).size();
}
BufferFlags buffer_flags() const { return buffer_flags_; }
// Remove me!! Callers should fetch rows via pop
Status GetTensor(std::shared_ptr<Tensor> *, int32_t row_id, int32_t col_id) const;
// Remove me!! Callers should drain rows via pop.
Status GetRow(int32_t row_id, TensorRow *) const;
// Get a row from the TensorTable
Status PopRow(TensorRow *);
Status SliceOff(int64_t number_of_rows);
// Replacing mTensorTable, the unique_ptr assignment will release the old TensorTable.
void set_tensor_table(std::unique_ptr<TensorQTable> new_table) { tensor_table_ = std::move(new_table); }
void set_flag(BufferFlags in_flag) {
buffer_flags_ = static_cast<BufferFlags>(static_cast<uint32_t>(buffer_flags_) | static_cast<uint32_t>(in_flag));
}
void Shuffle() {} // does nothing right now. possibly remove later
protected:
int32_t buffer_id_; // An id for the buffer.
std::unique_ptr<TensorQTable> tensor_table_; // A table (row major) of Tensors
BufferFlags buffer_flags_; // bit mask for various buffer properties
};
} // namespace dataset
} // namespace mindspore
#endif // MINDSPORE_CCSRC_MINDDATA_DATASET_ENGINE_DATA_BUFFER_H_

@ -19,7 +19,7 @@
#include "minddata/dataset/core/data_type.h"
#include "minddata/dataset/core/tensor.h"
#include "minddata/dataset/core/tensor_shape.h"
#include "minddata/dataset/engine/data_buffer.h"
#include "minddata/dataset/engine/execution_tree.h"
#include "minddata/dataset/util/status.h"
#include "minddata/dataset/engine/datasetops/dataset_op.h"

@ -17,7 +17,7 @@
#include <iomanip>
#include <utility>
#include "minddata/dataset/include/constants.h"
#include "minddata/dataset/engine/data_buffer.h"
#include "minddata/dataset/engine/db_connector.h"
#include "minddata/dataset/core/config_manager.h"
#include "minddata/dataset/core/global_context.h"

@ -28,7 +28,6 @@
namespace mindspore {
namespace dataset {
// Forward declare
class DataBuffer;
class ExecutionTree;
// BarrierOp class implements the Barrier operator. It will block sending of rows until a signal has

@ -21,7 +21,7 @@
#ifdef ENABLE_PYTHON
#include "minddata/dataset/core/pybind_support.h"
#endif
#include "minddata/dataset/engine/data_buffer.h"
#include "minddata/dataset/engine/db_connector.h"
#include "minddata/dataset/kernels/data/data_utils.h"
#include "minddata/dataset/util/status.h"

@ -34,7 +34,6 @@
namespace mindspore {
namespace dataset {
class DataBuffer;
using PadInfo = std::map<std::string, std::pair<TensorShape, std::shared_ptr<Tensor>>>;

@ -32,7 +32,6 @@
namespace mindspore {
namespace dataset {
class DataBuffer;
class BucketBatchByLengthOp : public PipelineOp {
public:

@ -94,11 +94,9 @@ Status CacheBase::FetchSamplesToWorkers() {
keys.reserve(1);
std::vector<row_id_type> prefetch_keys;
prefetch_keys.reserve(prefetch_size_);
std::unique_ptr<DataBuffer> sampler_buffer;
RETURN_IF_NOT_OK(sampler_->GetNextSample(&sampler_buffer));
while (!sampler_buffer->eoe()) {
TensorRow sample_row;
RETURN_IF_NOT_OK(sampler_buffer->PopRow(&sample_row));
RETURN_IF_NOT_OK(sampler_->GetNextSample(&sample_row));
while (!sample_row.eoe()) {
std::shared_ptr<Tensor> sample_ids = sample_row[0];
for (auto itr = sample_ids->begin<int64_t>(); itr != sample_ids->end<int64_t>(); itr++) {
++row_cnt_;
@ -115,7 +113,7 @@ Status CacheBase::FetchSamplesToWorkers() {
prefetch_keys.clear();
}
}
RETURN_IF_NOT_OK(sampler_->GetNextSample(&sampler_buffer));
RETURN_IF_NOT_OK(sampler_->GetNextSample(&sample_row));
}
// Deal with any partial keys left.
if (!prefetch_keys.empty()) {

@ -95,7 +95,7 @@ void CacheLookupOp::SamplerPrint(std::ostream &out, bool show_all) const {
// Then add our own info if any
}
}
Status CacheLookupOp::GetNextSample(std::unique_ptr<DataBuffer> *out_buffer) {
Status CacheLookupOp::GetNextSample(TensorRow *out) {
std::vector<row_id_type> cache_miss;
RETURN_IF_NOT_OK(keys_miss_->Pop(0, &cache_miss));
// Ignore the case we have no cache miss, we can't return empty samples.
@ -104,19 +104,16 @@ Status CacheLookupOp::GetNextSample(std::unique_ptr<DataBuffer> *out_buffer) {
}
// Special code for eoe
if (cache_miss.at(0) == eoe_row_id) {
*out_buffer = std::make_unique<DataBuffer>(0, DataBuffer::kDeBFlagEOE);
*out = std::move(TensorRow(TensorRow::kFlagEOE));
} else {
std::shared_ptr<Tensor> sample_ts;
RETURN_IF_NOT_OK(CreateSamplerTensor(&sample_ts, cache_miss.size()));
(*out_buffer) = std::make_unique<DataBuffer>(0, DataBuffer::kDeBFlagNone);
auto idPtr = sample_ts->begin<int64_t>();
for (auto i = 0; i < cache_miss.size(); ++i) {
*idPtr = cache_miss.at(i);
++idPtr;
}
TensorRow row;
row.push_back(sample_ts);
(*out_buffer)->set_tensor_table(std::make_unique<TensorQTable>(1, row));
*out = {sample_ts};
}
return Status::OK();
}

@ -96,7 +96,7 @@ class CacheLookupOp : public CacheBase, public SamplerRT {
Status ResetSampler() override;
Status HandshakeRandomAccessOp(const RandomAccessOp *op) override;
Status InitSampler() override;
Status GetNextSample(std::unique_ptr<DataBuffer> *out_buffer) override;
Status GetNextSample(TensorRow *out) override;
void Print(std::ostream &out, bool show_all) const override;
void SamplerPrint(std::ostream &out, bool show_all) const override;
bool AllowCacheMiss() override { return true; }

@ -22,7 +22,7 @@
#include "minddata/dataset/core/global_context.h"
#include "minddata/dataset/engine/datasetops/repeat_op.h"
#include "minddata/dataset/engine/dataset_iterator.h"
#include "minddata/dataset/engine/data_buffer.h"
#include "minddata/dataset/engine/execution_tree.h"
#include "minddata/dataset/util/log_adapter.h"
#include "minddata/dataset/util/task_manager.h"

@ -19,7 +19,7 @@
#include <utility>
#include "minddata/dataset/core/config_manager.h"
#include "minddata/dataset/engine/data_buffer.h"
#include "minddata/dataset/engine/db_connector.h"
#include "utils/ms_utils.h"

@ -26,7 +26,7 @@
#include "minddata/dataset/engine/execution_tree.h"
#include "minddata/dataset/engine/datasetops/device_queue_op.h"
#include "minddata/dataset/engine/datasetops/source/sampler/sampler.h"
#include "minddata/dataset/engine/data_buffer.h"
#include "minddata/dataset/engine/db_connector.h"
#ifndef ENABLE_ANDROID
#include "utils/system/crc32c.h"

@ -59,8 +59,6 @@ constexpr char kZipOp[] = "ZipOp";
// Forward declare
class ExecutionTree;
class DataBuffer;
class NodePass;
class SamplerRT;

@ -19,7 +19,7 @@
#include <algorithm>
#include <iostream>
#include <memory>
#include "minddata/dataset/engine/data_buffer.h"
#include "minddata/dataset/engine/dataset_iterator.h"
#include "minddata/dataset/util/status.h"
#include "minddata/dataset/util/task_manager.h"

@ -18,7 +18,7 @@
#include <utility>
#include "minddata/dataset/engine/datasetops/epoch_ctrl_op.h"
#include "minddata/dataset/engine/data_buffer.h"
#include "minddata/dataset/util/log_adapter.h"
namespace mindspore {

@ -22,7 +22,7 @@
#include "minddata/dataset/core/config_manager.h"
#include "minddata/dataset/core/global_context.h"
#include "minddata/dataset/core/tensor.h"
#include "minddata/dataset/engine/data_buffer.h"
#include "minddata/dataset/kernels/tensor_op.h"
#include "minddata/dataset/util/log_adapter.h"
#include "minddata/dataset/util/task_manager.h"

@ -23,7 +23,7 @@
#include "minddata/dataset/core/config_manager.h"
#include "minddata/dataset/include/constants.h"
#include "minddata/dataset/core/global_context.h"
#include "minddata/dataset/engine/data_buffer.h"
#include "minddata/dataset/engine/datasetops/map_op/cpu_map_job.h"
#include "minddata/dataset/engine/datasetops/map_op/gpu_map_job.h"
#include "minddata/dataset/engine/execution_tree.h"

@ -34,7 +34,6 @@
namespace mindspore {
namespace dataset {
// Forward declare
class DataBuffer;
class ExecutionTree;
// MapOp class implements the Map operator. It will apply a list of operations to each record specified by column names.

@ -30,8 +30,6 @@ namespace dataset {
constexpr int32_t kEndOfActions = -1;
// Forward declares
class DataBuffer;
class DbConnector;
// A ParallelOp provides a multi-threaded DatasetOp

Some files were not shown because too many files have changed in this diff Show More

Loading…
Cancel
Save