You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
182 lines
6.9 KiB
182 lines
6.9 KiB
/**
|
|
* Copyright 2019 Huawei Technologies Co., Ltd
|
|
*
|
|
* Licensed under the Apache License, Version 2.0 (the "License");
|
|
* you may not use this file except in compliance with the License.
|
|
* You may obtain a copy of the License at
|
|
*
|
|
* http://www.apache.org/licenses/LICENSE-2.0
|
|
*
|
|
* Unless required by applicable law or agreed to in writing, software
|
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
* See the License for the specific language governing permissions and
|
|
* limitations under the License.
|
|
*/
|
|
#include "dataset/engine/datasetops/rename_op.h"
|
|
#include <iomanip>
|
|
#include <vector>
|
|
#include <utility>
|
|
#include <unordered_map>
|
|
|
|
#include "dataset/core/config_manager.h"
|
|
#include "dataset/core/constants.h"
|
|
#include "dataset/core/global_context.h"
|
|
#include "dataset/engine/data_buffer.h"
|
|
#include "dataset/engine/db_connector.h"
|
|
#include "dataset/engine/opt/pass.h"
|
|
#include "utils/log_adapter.h"
|
|
|
|
namespace mindspore {
|
|
namespace dataset {
|
|
// builds
|
|
RenameOp::Builder::Builder() {
|
|
// Some arguments to the RenameOp constructor have a default argument that is taken
|
|
// from the client config.
|
|
// The user may choose to change these values for the construction of the RenameOp by
|
|
// using the various builder set methods.
|
|
|
|
std::shared_ptr<ConfigManager> cfg = GlobalContext::config_manager();
|
|
builder_op_connector_size_ = cfg->op_connector_size();
|
|
}
|
|
|
|
Status RenameOp::Builder::SanityCheck() const { return Status::OK(); }
|
|
|
|
// build method for RenameOp
|
|
Status RenameOp::Builder::Build(std::shared_ptr<RenameOp> *ptr) {
|
|
RETURN_IF_NOT_OK(SanityCheck());
|
|
*ptr = std::make_shared<RenameOp>(builder_in_columns_, builder_out_columns_, builder_op_connector_size_);
|
|
return Status::OK();
|
|
}
|
|
|
|
// constructor
|
|
RenameOp::RenameOp(const std::vector<std::string> &in_col_names, const std::vector<std::string> &out_col_names,
|
|
int32_t op_connector_size)
|
|
: PipelineOp(op_connector_size), in_columns_(in_col_names), out_columns_(out_col_names) {}
|
|
|
|
// destructor
|
|
RenameOp::~RenameOp() {}
|
|
|
|
// main entry point for rename
|
|
Status RenameOp::operator()() {
|
|
TaskManager::FindMe()->Post();
|
|
std::unique_ptr<DataBuffer> curr_buffer;
|
|
RETURN_IF_NOT_OK(GetNextInput(&curr_buffer));
|
|
if (curr_buffer->buffer_flags() != DataBuffer::kDeBFlagNone) {
|
|
RETURN_IF_NOT_OK(out_connector_->Add(0, std::move(curr_buffer)));
|
|
std::string err_msg = "Rename first buffer got was control signal";
|
|
// if 1st eoe or eof, pass it on then return
|
|
RETURN_STATUS_UNEXPECTED(err_msg);
|
|
}
|
|
|
|
// First, populate the column map from the input child.
|
|
// This will not be the final map for output from this op.
|
|
RETURN_IF_NOT_OK(DatasetOp::AssignColMapFromChild());
|
|
// core rename functionality only needs to happen once, to identify the new column names/indexes
|
|
RETURN_IF_NOT_OK(RenameColumns());
|
|
|
|
while (curr_buffer->eof() == false) {
|
|
while (curr_buffer->eoe() == false) {
|
|
// push the renamed input buffer
|
|
MS_LOG(DEBUG) << "Rename operator pushing next buffer.";
|
|
RETURN_IF_NOT_OK(out_connector_->Add(0, std::move(curr_buffer)));
|
|
RETURN_IF_NOT_OK(GetNextInput(&curr_buffer));
|
|
} // end of while eoe loop
|
|
|
|
// we got eoe, now try again until we get eof
|
|
MS_LOG(DEBUG) << "Rename operator EOE Received.";
|
|
RETURN_IF_NOT_OK(out_connector_->Add(0, std::move(std::make_unique<DataBuffer>(0, DataBuffer::kDeBFlagEOE))));
|
|
MS_LOG(DEBUG) << "Rename operator fetching buffer after EOE.";
|
|
RETURN_IF_NOT_OK(GetNextInput(&curr_buffer));
|
|
} // end of while eof loop
|
|
|
|
MS_LOG(DEBUG) << "Rename opeerator EOF Received.";
|
|
RETURN_IF_NOT_OK(out_connector_->Add(0, std::move(std::make_unique<DataBuffer>(0, DataBuffer::kDeBFlagEOF))));
|
|
return Status::OK();
|
|
}
|
|
|
|
// renames the columns
|
|
Status RenameOp::RenameColumns() {
|
|
// iterate over my index in input vector, find the corresponding position
|
|
std::unordered_map<std::string, int32_t> new_col_name_id_map = {};
|
|
// parameter for input check
|
|
size_t found = 0;
|
|
|
|
// iterate over all the pairs and if there is a name match with rename, rename the column and add it to new map
|
|
// by doing it this way we recreate a new ColNameIdMap and allow for switching
|
|
for (const auto &pair : column_name_id_map_) {
|
|
std::string name = pair.first;
|
|
int32_t id = pair.second;
|
|
// find name
|
|
std::vector<std::string>::iterator it;
|
|
it = std::find(in_columns_.begin(), in_columns_.end(), name);
|
|
// for c input checks here we have to count the number of times we find the stuff in in_columns_
|
|
// because we iterate over the mInputList n times
|
|
if (it != in_columns_.end()) {
|
|
// found
|
|
found += 1;
|
|
int index = std::distance(in_columns_.begin(), it);
|
|
MS_LOG(DEBUG) << "Rename operator index found " << index << " value " << id << ".";
|
|
|
|
new_col_name_id_map[out_columns_[index]] = id;
|
|
} else {
|
|
// not found
|
|
MS_LOG(DEBUG) << "Rename operator index not found: " << id << " is the column id.";
|
|
new_col_name_id_map[name] = id;
|
|
}
|
|
}
|
|
// only checks number of renamed columns have been found, this input check doesn't check everything
|
|
if (found != in_columns_.size()) {
|
|
MS_LOG(DEBUG) << "Rename operator column names found: " << found << " out of " << in_columns_.size() << ".";
|
|
std::string err_msg = "Renamed column doesn't exist in dataset";
|
|
RETURN_STATUS_UNEXPECTED(err_msg);
|
|
}
|
|
|
|
// Now, overwrite our column map with the new renamed columns/id's
|
|
column_name_id_map_ = new_col_name_id_map;
|
|
return Status::OK();
|
|
}
|
|
|
|
// prints rename
|
|
void RenameOp::Print(std::ostream &out, // In: The output stream to print to
|
|
bool show_all) const { // In: T/F if it should print everything
|
|
// Always show the id and name as first line regardless if this summary or detailed print
|
|
out << "(" << std::setw(2) << operator_id_ << ") <RenameOp>:";
|
|
if (!show_all) {
|
|
// Call the super class for displaying any common 1-liner info
|
|
PipelineOp::Print(out, show_all);
|
|
// Then show any custom derived-internal 1-liner info for this op
|
|
out << "\n";
|
|
} else {
|
|
// Call the super class for displaying any common detailed info
|
|
PipelineOp::Print(out, show_all);
|
|
// Then show any custom derived-internal stuff
|
|
out << "\nIn columns:";
|
|
for (size_t i = 0; i < in_columns_.size(); ++i) {
|
|
out << "\n " << in_columns_[i];
|
|
}
|
|
for (size_t i = 0; i < out_columns_.size(); ++i) {
|
|
out << "\n " << out_columns_[i];
|
|
}
|
|
out << "\n\n";
|
|
}
|
|
}
|
|
|
|
Status RenameOp::EofReceived(int32_t) {
|
|
MS_LOG(DEBUG) << "Rename operator EOF received, do nothing now.";
|
|
return Status::OK();
|
|
}
|
|
|
|
Status RenameOp::EoeReceived(int32_t) {
|
|
state_ = OpState::kDeOpIdle;
|
|
return Status::OK();
|
|
}
|
|
|
|
// Visitor accept method for NodePass
|
|
Status RenameOp::Accept(NodePass *p, bool *modified) {
|
|
// Downcast shared pointer then call visitor
|
|
return p->RunOnNode(std::static_pointer_cast<RenameOp>(shared_from_this()), modified);
|
|
}
|
|
} // namespace dataset
|
|
} // namespace mindspore
|