singleop support memcpy

pull/1313/head
chuxing 4 years ago
parent f8d6c5c421
commit 1bec70d529

@ -344,6 +344,7 @@ set(TRAIN_SRC_LIST
"single_op/task/op_task.cc"
"single_op/task/tbe_task_builder.cc"
"single_op/task/aicpu_task_builder.cc"
"single_op/task/rts_kernel_task_builder.cc"
"single_op/task/aicpu_kernel_task_builder.cc"
"hybrid/common/tensor_value.cc"
"hybrid/common/npu_memory_allocator.cc"

@ -30,6 +30,7 @@
#include "runtime/rt.h"
#include "task/aicpu_task_builder.h"
#include "task/aicpu_kernel_task_builder.h"
#include "task/rts_kernel_task_builder.h"
#include "task/tbe_task_builder.h"
#include "hybrid/executor/hybrid_model_executor.h"
#include "hybrid/node_executor/node_executor.h"
@ -248,7 +249,9 @@ Status SingleOpModel::ParseInputsAndOutputs() {
for (auto &op_desc : data_ops_) {
GE_CHK_STATUS_RET_NOLOG(ParseInputNode(op_desc));
}
ParseOutputNode(netoutput_op_);
if (netoutput_op_ != nullptr) {
ParseOutputNode(netoutput_op_);
}
return SUCCESS;
}
@ -330,6 +333,17 @@ Status SingleOpModel::BuildTaskList(StreamResource *stream_resource, SingleOp &s
aicpu_task->SetModelArgs(model_name_, model_id_);
ParseArgTable(aicpu_task, single_op);
single_op.tasks_.emplace_back(aicpu_task);
} else if (task_type == RT_MODEL_TASK_MEMCPY_ASYNC || task_type == RT_MODEL_TASK_MEMCPY_ADDR_ASYNC) {
auto kernel_def = task_def.memcpy_async();
auto node = op_list_[kernel_def.op_index()];
GE_CHECK_NOTNULL(node);
auto op_desc = node->GetOpDesc();
GE_CHECK_NOTNULL(op_desc);
std::unique_ptr<MemcpyAsyncTask> task;
GE_CHK_STATUS_RET_NOLOG(RtsKernelTaskBuilder::BuildMemcpyAsyncTask(op_desc, kernel_def, model_params_, task));
task->SetModelArgs(model_name_, model_id_);
ParseArgTable(task.get(), single_op);
single_op.tasks_.emplace_back(task.release());
} else {
// skip
GELOGD("Skip task type: %d", static_cast<int>(task_type));

@ -22,6 +22,7 @@
#include <memory>
#include <string>
#include <vector>
#include <single_op/task/op_task.h>
#include "common/helper/model_helper.h"
#include "single_op/single_op.h"

@ -35,6 +35,7 @@ namespace ge {
namespace {
constexpr int kLaunchRetryTimes = 1000;
constexpr int kSleepTime = 10;
constexpr size_t kMemcpyArgCount = 2;
constexpr uint64_t kReleaseFlag = 1;
constexpr int kCopyNum = 2;
constexpr uint64_t kInferSessionId = 0;
@ -911,4 +912,16 @@ void AiCpuCCTask::GetIoAddr(uintptr_t *&arg_base, size_t &arg_count) {
arg_base = io_addr_;
arg_count = io_addr_num_;
}
Status MemcpyAsyncTask::LaunchKernel(rtStream_t stream) {
auto src_addr = reinterpret_cast<void *>(addresses_[0]);
auto dst_addr = reinterpret_cast<void *>(addresses_[1]);
GE_CHK_RT_RET(rtMemcpyAsync(dst_addr, dst_max_, src_addr, count_, kind_, stream));
return SUCCESS;
}
void MemcpyAsyncTask::GetIoAddr(uintptr_t *&arg_base, size_t &arg_count) {
arg_base = addresses_;
arg_count = kMemcpyArgCount;
}
} // namespace ge

@ -44,6 +44,9 @@ class OpTask {
virtual Status UpdateArgTable(const SingleOpModelParam &param);
void SetModelArgs(std::string model_name, uint32_t model_id);
Status GetProfilingArgs(TaskDescInfo &task_desc_info, uint32_t &model_id);
void SetOpDesc(const OpDescPtr &op_desc) {
op_desc_ = op_desc;
}
const OpDescPtr &GetOpdesc() const {return op_desc_;}
Status OpenDump(rtStream_t stream);
virtual void GetIoAddr(uintptr_t *&arg_base, size_t &arg_count) = 0;
@ -242,6 +245,22 @@ private:
std::string op_type_;
uint64_t kernel_id_ = 0;
};
class MemcpyAsyncTask : public OpTask {
public:
Status LaunchKernel(rtStream_t stream) override;
void GetIoAddr(uintptr_t *&arg_base, size_t &arg_count) override;
private:
friend class SingleOpModel;
friend class RtsKernelTaskBuilder;
uintptr_t addresses_[2];
size_t dst_max_;
size_t count_;
rtMemcpyKind_t kind_;
NodePtr node_;
};
} // namespace ge
#endif // GE_SINGLE_OP_TASK_OP_TASK_H_

@ -0,0 +1,45 @@
/**
* Copyright 2019-2020 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "rts_kernel_task_builder.h"
#include "build_task_utils.h"
namespace ge {
namespace {
const size_t kNumAddresses = 2;
} // namespace
Status RtsKernelTaskBuilder::BuildMemcpyAsyncTask(const OpDescPtr &op_desc,
const domi::MemcpyAsyncDef &kernel_def,
const SingleOpModelParam &param,r
std::unique_ptr<MemcpyAsyncTask> &task) {
task.reset(new(std::nothrow)MemcpyAsyncTask());
GE_CHECK_NOTNULL(task);
task->SetOpDesc(op_desc);
task->dst_max_ = kernel_def.dst_max();
task->count_ = kernel_def.count();
task->kind_ = static_cast<rtMemcpyKind_t>(kernel_def.kind());
auto addresses = BuildTaskUtils::JoinAddresses(BuildTaskUtils::GetAddresses(op_desc, param, false));
if (addresses.size() != kNumAddresses) {
GELOGE(INTERNAL_ERROR, "[Build][MemcpyAsyncTask] Invalid address count: %zu", addresses.size());
return INTERNAL_ERROR;
}
task->addresses_[0] = reinterpret_cast<uintptr_t>(addresses[0]);
task->addresses_[1] = reinterpret_cast<uintptr_t>(addresses[1]);
return SUCCESS;
}
} // namespace ge

@ -0,0 +1,34 @@
/**
* Copyright 2019-2020 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef GE_SINGLE_OP_TASK_RTS_KERNEL_TASK_BUILDER_H_
#define GE_SINGLE_OP_TASK_RTS_KERNEL_TASK_BUILDER_H_
#include <vector>
#include "graph/op_desc.h"
#include "single_op/single_op.h"
#include "single_op/single_op_model.h"
namespace ge {
class RtsKernelTaskBuilder {
public:
static Status BuildMemcpyAsyncTask(const OpDescPtr &op_desc,
const domi::MemcpyAsyncDef &kernel_def,
const SingleOpModelParam &param,
std::unique_ptr<MemcpyAsyncTask> &task);
};
} // namespace ge
#endif // GE_SINGLE_OP_TASK_RTS_KERNEL_TASK_BUILDER_H_

@ -25,6 +25,7 @@
#define private public
#include "single_op/single_op_model.h"
#include "single_op/task/tbe_task_builder.h"
#include "single_op/task/rts_kernel_task_builder.h"
#undef private
#undef protected
@ -223,3 +224,26 @@ TEST_F(UtestSingleOpModel, test_build_dynamic_op) {
model.BuildDynamicOp(res, dynamic_single_op);
}
TEST_F(UtestSingleOpModel, test_build_memcpy_task) {
auto op_desc = std::make_shared<OpDesc>(MEMCPYASYNC, MEMCPYASYNC);
domi::MemcpyAsyncDef kernel_def;
kernel_def.set_dst_max(8);
kernel_def.set_count(8);
kernel_def.set_kind(2);
GeShape shape({2});
GeTensorDesc tensor_desc(shape);
op_desc->AddInputDesc("x", tensor_desc);
op_desc->AddOutputDesc("y", tensor_desc);
op_desc->SetInputOffset({0});
op_desc->SetOutputOffset({0});
std::unique_ptr<MemcpyAsyncTask> task;
SingleOpModelParam param{};
auto ret = RtsKernelTaskBuilder::BuildMemcpyAsyncTask(op_desc, kernel_def, param, task);
ASSERT_EQ(ret, SUCCESS);
op_desc->SetInputOffset({0, 0});
op_desc->AddOutputDesc("y2", tensor_desc);
ret = RtsKernelTaskBuilder::BuildMemcpyAsyncTask(op_desc, kernel_def, param, task);
ASSERT_EQ(ret, INTERNAL_ERROR);
}

Loading…
Cancel
Save