|
|
|
@ -26,9 +26,6 @@ using mindspore::schema::PrimitiveType_EmbeddingLookup;
|
|
|
|
|
|
|
|
|
|
namespace mindspore::kernel {
|
|
|
|
|
int EmbeddingLookupCPUKernel::Init() {
|
|
|
|
|
embedding_lookup_parameter_ = reinterpret_cast<EmbeddingLookupParameter *>(op_parameter_);
|
|
|
|
|
embedding_lookup_parameter_->thread_num = thread_count_;
|
|
|
|
|
|
|
|
|
|
if (!InferShapeDone()) {
|
|
|
|
|
return RET_OK;
|
|
|
|
|
}
|
|
|
|
@ -36,24 +33,24 @@ int EmbeddingLookupCPUKernel::Init() {
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
int EmbeddingLookupCPUKernel::ReSize() {
|
|
|
|
|
embedding_lookup_parameter_->ids_size_ = in_tensors_.back()->ElementsNum();
|
|
|
|
|
|
|
|
|
|
embedding_lookup_parameter_->layer_size_ = 1;
|
|
|
|
|
param_->ids_size_ = in_tensors_.back()->ElementsNum();
|
|
|
|
|
param_->layer_size_ = 1;
|
|
|
|
|
auto in_shape = in_tensors_.front()->shape();
|
|
|
|
|
for (size_t i = 1; i < in_shape.size(); ++i) {
|
|
|
|
|
embedding_lookup_parameter_->layer_size_ *= in_shape[i];
|
|
|
|
|
param_->layer_size_ *= in_shape[i];
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
embedding_lookup_parameter_->layer_num_ = 0;
|
|
|
|
|
param_->layer_num_ = 0;
|
|
|
|
|
for (size_t i = 0; i < in_tensors_.size() - 1; ++i) {
|
|
|
|
|
embedding_lookup_parameter_->layer_num_ += in_tensors_[i]->shape()[0];
|
|
|
|
|
param_->layer_num_ += in_tensors_[i]->shape()[0];
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
return RET_OK;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
int EmbeddingLookupCPUKernel::DoExcute(int task_id) {
|
|
|
|
|
int error_code = EmbeddingLookup(input_addr_, ids_addr_, output_addr_, embedding_lookup_parameter_, task_id);
|
|
|
|
|
auto ids_addr = reinterpret_cast<int *>(in_tensors_.back()->MutableData());
|
|
|
|
|
auto output_addr = reinterpret_cast<float *>(out_tensors_.front()->MutableData());
|
|
|
|
|
int error_code = EmbeddingLookup(input_addr_, ids_addr, output_addr, param_, task_id);
|
|
|
|
|
if (error_code != RET_OK) {
|
|
|
|
|
MS_LOG(ERROR) << "embedding lookup error error_code[" << error_code << "]";
|
|
|
|
|
return RET_ERROR;
|
|
|
|
@ -62,8 +59,8 @@ int EmbeddingLookupCPUKernel::DoExcute(int task_id) {
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
int EmbeddingLookupRun(void *cdata, int task_id) {
|
|
|
|
|
auto EmbeddingLookupData = reinterpret_cast<EmbeddingLookupCPUKernel *>(cdata);
|
|
|
|
|
auto ret = EmbeddingLookupData->DoExcute(task_id);
|
|
|
|
|
auto kernel = reinterpret_cast<EmbeddingLookupCPUKernel *>(cdata);
|
|
|
|
|
auto ret = kernel->DoExcute(task_id);
|
|
|
|
|
if (ret != RET_OK) {
|
|
|
|
|
MS_LOG(ERROR) << "EmbeddingLookupRun error task_id[" << task_id << "] error_code[" << ret << "]";
|
|
|
|
|
return RET_ERROR;
|
|
|
|
@ -73,39 +70,38 @@ int EmbeddingLookupRun(void *cdata, int task_id) {
|
|
|
|
|
|
|
|
|
|
int EmbeddingLookupCPUKernel::Run() {
|
|
|
|
|
MS_ASSERT(context_->allocator != nullptr);
|
|
|
|
|
input_addr_ = reinterpret_cast<float *>(context_->allocator->Malloc(
|
|
|
|
|
sizeof(float) * embedding_lookup_parameter_->layer_size_ * embedding_lookup_parameter_->layer_num_));
|
|
|
|
|
embedding_lookup_parameter_->is_regulated_ =
|
|
|
|
|
reinterpret_cast<bool *>(context_->allocator->Malloc(sizeof(bool) * embedding_lookup_parameter_->layer_num_));
|
|
|
|
|
if (input_addr_ == nullptr || embedding_lookup_parameter_->is_regulated_ == nullptr) {
|
|
|
|
|
input_addr_ =
|
|
|
|
|
reinterpret_cast<float *>(context_->allocator->Malloc(sizeof(float) * param_->layer_size_ * param_->layer_num_));
|
|
|
|
|
param_->is_regulated_ = reinterpret_cast<bool *>(context_->allocator->Malloc(sizeof(bool) * param_->layer_num_));
|
|
|
|
|
if (input_addr_ == nullptr || param_->is_regulated_ == nullptr) {
|
|
|
|
|
MS_LOG(ERROR) << "Memory allocation failed";
|
|
|
|
|
context_->allocator->Free(input_addr_);
|
|
|
|
|
context_->allocator->Free(embedding_lookup_parameter_->is_regulated_);
|
|
|
|
|
FreeRunBuff();
|
|
|
|
|
return RET_ERROR;
|
|
|
|
|
}
|
|
|
|
|
for (int i = 0; i < embedding_lookup_parameter_->layer_num_; ++i) {
|
|
|
|
|
embedding_lookup_parameter_->is_regulated_[i] = embedding_lookup_parameter_->max_norm_ == 0;
|
|
|
|
|
for (int i = 0; i < param_->layer_num_; ++i) {
|
|
|
|
|
param_->is_regulated_[i] = param_->max_norm_ == 0;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
int dest_loc = 0;
|
|
|
|
|
for (size_t i = 0; i < in_tensors_.size() - 1; i++) {
|
|
|
|
|
auto input_t = reinterpret_cast<float *>(in_tensors_.at(i)->MutableData());
|
|
|
|
|
memcpy(input_addr_ + dest_loc, input_t, sizeof(float) * in_tensors_.at(i)->ElementsNum());
|
|
|
|
|
dest_loc += in_tensors_.at(i)->ElementsNum();
|
|
|
|
|
}
|
|
|
|
|
output_addr_ = reinterpret_cast<float *>(out_tensors_.front()->MutableData());
|
|
|
|
|
ids_addr_ = reinterpret_cast<int *>(in_tensors_.back()->MutableData());
|
|
|
|
|
|
|
|
|
|
auto ret =
|
|
|
|
|
ParallelLaunch(this->context_->thread_pool_, EmbeddingLookupRun, this, embedding_lookup_parameter_->thread_num);
|
|
|
|
|
context_->allocator->Free(input_addr_);
|
|
|
|
|
context_->allocator->Free(embedding_lookup_parameter_->is_regulated_);
|
|
|
|
|
auto ret = ParallelLaunch(this->context_->thread_pool_, EmbeddingLookupRun, this, op_parameter_->thread_num_);
|
|
|
|
|
FreeRunBuff();
|
|
|
|
|
if (ret != RET_OK) {
|
|
|
|
|
MS_LOG(ERROR) << "EmbeddingLookup error: error_code[" << ret << "]";
|
|
|
|
|
}
|
|
|
|
|
return ret;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
void EmbeddingLookupCPUKernel::FreeRunBuff() {
|
|
|
|
|
context_->allocator->Free(input_addr_);
|
|
|
|
|
context_->allocator->Free(param_->is_regulated_);
|
|
|
|
|
input_addr_ = nullptr;
|
|
|
|
|
param_->is_regulated_ = nullptr;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
kernel::LiteKernel *CpuEmbeddingLookupFp32KernelCreator(const std::vector<lite::Tensor *> &inputs,
|
|
|
|
|
const std::vector<lite::Tensor *> &outputs,
|
|
|
|
|
OpParameter *parameter, const lite::InnerContext *ctx,
|
|
|
|
|