diff --git a/mindspore/lite/schema/ops.fbs b/mindspore/lite/schema/ops.fbs index 6070c4d10c..2fc1024c5e 100644 --- a/mindspore/lite/schema/ops.fbs +++ b/mindspore/lite/schema/ops.fbs @@ -376,7 +376,7 @@ table BNGradInput { channels: int; } table Scale { - format: Format = 0; + axis: int; } table Eltwise { diff --git a/mindspore/lite/src/ops/nchw2nhwc.cc b/mindspore/lite/src/ops/nchw2nhwc.cc index bd5f27b86a..5a420ceba8 100644 --- a/mindspore/lite/src/ops/nchw2nhwc.cc +++ b/mindspore/lite/src/ops/nchw2nhwc.cc @@ -28,12 +28,16 @@ int Nchw2Nhwc::InferShape(std::vector inputs_, std::vector nchw_shape = input->shape(); - std::vector nhwc_shape{nchw_shape}; - nhwc_shape[NHWC_N] = nchw_shape[NCHW_N]; - nhwc_shape[NHWC_H] = nchw_shape[NCHW_H]; - nhwc_shape[NHWC_W] = nchw_shape[NCHW_W]; - nhwc_shape[NHWC_C] = nchw_shape[NCHW_C]; - output->set_shape(nhwc_shape); + if (nchw_shape.size() != 4) { + output->set_shape(nchw_shape); + } else { + std::vector nhwc_shape{nchw_shape}; + nhwc_shape[NHWC_N] = nchw_shape[NCHW_N]; + nhwc_shape[NHWC_H] = nchw_shape[NCHW_H]; + nhwc_shape[NHWC_W] = nchw_shape[NCHW_W]; + nhwc_shape[NHWC_C] = nchw_shape[NCHW_C]; + output->set_shape(nhwc_shape); + } output->SetFormat(schema::Format_NHWC); output->set_data_type(input->data_type()); return RET_OK; diff --git a/mindspore/lite/src/ops/nhwc2nchw.cc b/mindspore/lite/src/ops/nhwc2nchw.cc index 049b1a4a18..579ce71be2 100644 --- a/mindspore/lite/src/ops/nhwc2nchw.cc +++ b/mindspore/lite/src/ops/nhwc2nchw.cc @@ -28,15 +28,18 @@ int Nhwc2Nchw::InferShape(std::vector inputs_, std::vector nhwc_shape = input->shape(); - std::vector nchw_shape{nhwc_shape}; - nchw_shape[NCHW_N] = nhwc_shape[NHWC_N]; - nchw_shape[NCHW_C] = nhwc_shape[NHWC_C]; - nchw_shape[NCHW_H] = nhwc_shape[NHWC_H]; - nchw_shape[NCHW_W] = nhwc_shape[NHWC_W]; - output->set_shape(nchw_shape); + if (nhwc_shape.size() != 4) { + output->set_shape(nhwc_shape); + } else { + std::vector nchw_shape{nhwc_shape}; + nchw_shape[NCHW_N] = nhwc_shape[NHWC_N]; + nchw_shape[NCHW_C] = nhwc_shape[NHWC_C]; + nchw_shape[NCHW_H] = nhwc_shape[NHWC_H]; + nchw_shape[NCHW_W] = nhwc_shape[NHWC_W]; + output->set_shape(nchw_shape); + } output->SetFormat(schema::Format_NCHW); output->set_data_type(input->data_type()); return RET_OK; } } // namespace mindspore::lite - diff --git a/mindspore/lite/src/populate_parameter.cc b/mindspore/lite/src/populate_parameter.cc index fabeec8b59..521ae28867 100644 --- a/mindspore/lite/src/populate_parameter.cc +++ b/mindspore/lite/src/populate_parameter.cc @@ -753,15 +753,7 @@ OpParameter *PopulateScaleParameter(const lite::Primitive *primitive) { MS_LOG(ERROR) << "value_as_Scale return nullptr"; return nullptr; } - // NCHW todo use enum - if (param->format() == schema::Format_NCHW) { - scale_param->axis_ = 1; - scale_param->num_axis_ = 1; - } else if (param->format() == schema::Format_NHWC) { - scale_param->axis_ = 3; - scale_param->num_axis_ = 1; - } - + scale_param->axis_ = param->axis(); return reinterpret_cast(scale_param); } diff --git a/mindspore/lite/src/runtime/kernel/arm/fp16/convolution_3x3_fp16.cc b/mindspore/lite/src/runtime/kernel/arm/fp16/convolution_3x3_fp16.cc index ca8611f34b..576c5f3752 100644 --- a/mindspore/lite/src/runtime/kernel/arm/fp16/convolution_3x3_fp16.cc +++ b/mindspore/lite/src/runtime/kernel/arm/fp16/convolution_3x3_fp16.cc @@ -278,7 +278,7 @@ int Convolution3x3FP16CPUKernel::Run() { auto out_tensor = outputs_.at(kOutputIndex); auto output_addr = reinterpret_cast(out_tensor->Data()); for (int j = 0; j < out_tensor->ElementsNum(); ++j) { - output_addr[j] = (float)fp16_out_[j]; + output_addr[j] = (reinterpret_cast(fp16_out_))[j]; } return RET_OK; } diff --git a/mindspore/lite/src/runtime/kernel/arm/fp32/scale.cc b/mindspore/lite/src/runtime/kernel/arm/fp32/scale.cc index 2dbafaef21..376baea21e 100644 --- a/mindspore/lite/src/runtime/kernel/arm/fp32/scale.cc +++ b/mindspore/lite/src/runtime/kernel/arm/fp32/scale.cc @@ -29,85 +29,91 @@ using mindspore::lite::RET_OK; using mindspore::schema::PrimitiveType_Scale; namespace mindspore::kernel { -namespace { -constexpr int kScaleInputNum = 1; -constexpr int kScaleOutputNum = 1; -} // namespace -int ScaleCPUKernel::Init() { +int ScaleCPUKernel::InitScaleOffset() { auto param = reinterpret_cast(opParameter); - auto in_tensor = inputs_.front(); - auto scale = inputs_.at(1); - - if (inputs_.size() < 2 || inputs_.size() > 3) { - MS_LOG(ERROR) << "inputs to Scale operator should be 2 or 3, but " << inputs_.size() << " is given."; - return RET_ERROR; + auto scale_tensor = inputs_.at(1); + float *scale_ptr = reinterpret_cast(inputs_.at(1)->Data()); + if (scale_ptr != nullptr) { + scale_ = reinterpret_cast(malloc(scale_tensor->ElementsNum() * sizeof(float))); + if (scale_ == nullptr) { + MS_LOG(ERROR) << "Malloc buffer failed."; + return RET_ERROR; + } + memcpy(scale_, scale_ptr, scale_tensor->ElementsNum() * sizeof(float)); + } else { + scale_ = nullptr; } - if (param->axis_ < 0) { - MS_LOG(ERROR) << "axis illegal."; - return RET_ERROR; + if (inputs_.size() == 3) { + auto offset_tensor = inputs_.at(1); + offset_ = reinterpret_cast(malloc(offset_tensor->ElementsNum() * sizeof(float))); + if (offset_ == nullptr) { + MS_LOG(ERROR) << "Malloc buffer failed."; + return RET_ERROR; + } + param->has_offset_ = true; + } else { + offset_ = nullptr; + param->has_offset_ = false; } - if (param->num_axis_ < 1 || param->num_axis_ + param->axis_ >= in_tensor->shape().size()) { - MS_LOG(ERROR) << "number of axis illegal"; + return RET_OK; +} + +int ScaleCPUKernel::InitParameter() { + auto param = reinterpret_cast(opParameter); + auto in_tensor = inputs_.at(0); + auto in_shape = in_tensor->shape(); + auto scale_tensor = inputs_.at(1); + auto scale_shape = scale_tensor->shape(); + + if (scale_shape.size() + param->axis_ > in_shape.size()) { + MS_LOG(ERROR) << "Scale tensor shape is incorrect."; return RET_ERROR; } - - param->channel_ = 1; - param->out_count_ = 1; - param->in_stride_ = 1; - int cur_axis; - for (cur_axis = 0; cur_axis < param->axis_; cur_axis++) { - param->out_count_ *= in_tensor->shape()[cur_axis]; + param->outer_size_ = 1; + param->axis_size_ = 1; + param->inner_size_ = 1; + for (int i = 0; i < param->axis_; i++) { + param->outer_size_ *= in_shape[i]; } - for (int i = 0; i < param->num_axis_; i++) { - param->channel_ *= in_tensor->shape()[(cur_axis++)]; + for (int i = 0; i < scale_shape.size(); i++) { + if (in_shape[i + param->axis_] != scale_shape[i]) { + MS_LOG(ERROR) << "Scale tensor shape is incorrect."; + return RET_ERROR; + } + param->axis_size_ *= in_shape[i + param->axis_]; } - for (int i = cur_axis; i < in_tensor->shape().size(); i++) { - param->in_stride_ *= in_tensor->shape()[cur_axis]; + for (int i = param->axis_ + scale_shape.size(); i < in_shape.size(); i++) { + param->inner_size_ *= in_shape[i]; } - if (scale->shape().back() != param->channel_ || scale->shape().size() > 2) { - MS_LOG(ERROR) << "scale shape illegal."; + return RET_OK; +} + +int ScaleCPUKernel::Init() { + if (inputs_.size() < 2 || inputs_.size() > 3) { + MS_LOG(ERROR) << "inputs to Scale operator should be 2 or 3, but " << inputs_.size() << " is given."; return RET_ERROR; } - if (inputs_.size() == 3) { - if ((inputs_.at(2))->shape().back() != param->channel_ || (inputs_.at(2))->shape().size() > 2) { - MS_LOG(ERROR) << "offset shape illegal."; - return RET_ERROR; - } - } - input_ptr_ = reinterpret_cast(inputs_.front()->Data()); - scale_ = reinterpret_cast(inputs_.at(1)->Data()); - if (inputs_.size() == 3) { - offset_ = reinterpret_cast(inputs_.at(2)->Data()); - has_offset_ = true; - } else { - offset_ = nullptr; - has_offset_ = false; + auto ret = InitParameter(); + if (ret != RET_OK) { + MS_LOG(ERROR) << "Scale fp32 InitParameter failed."; + return RET_ERROR; } - output_ptr_ = reinterpret_cast(outputs_.front()->Data()); - num_unit_ = param->out_count_ * param->channel_; - unit_size_ = param->in_stride_; - thread_n_num_ = MSMIN(thread_num_, num_unit_); - thread_n_stride_ = UP_DIV(num_unit_, thread_n_num_); + ret = InitScaleOffset(); + if (ret != RET_OK) { + MS_LOG(ERROR) << "Scale fp32 InitScaleOffset failed."; + return RET_ERROR; + } return RET_OK; } +int ScaleCPUKernel::ReSize() { return RET_OK; } + int ScaleCPUKernel::Scale(int task_id) { - int num_unit_thread = MSMIN(thread_n_stride_, num_unit_ - task_id * thread_n_stride_); - if (num_unit_thread <= 0) { - return RET_OK; - } - int thread_offset = task_id * thread_n_stride_; - int ret; - if (has_offset_) { - ret = DoScale(input_ptr_, output_ptr_, scale_, offset_, thread_offset, num_unit_thread, - reinterpret_cast(opParameter)); - } else { - ret = DoScale(input_ptr_, output_ptr_, scale_, thread_offset, num_unit_thread, - reinterpret_cast(opParameter)); - } + auto ret = + DoScale(input_ptr_, output_ptr_, scale_, offset_, task_id, reinterpret_cast(opParameter)); if (ret != RET_OK) { MS_LOG(ERROR) << "Scale error task_id[" << task_id << "] error_code[" << ret << "]"; @@ -116,11 +122,9 @@ int ScaleCPUKernel::Scale(int task_id) { return RET_OK; } -int ScaleCPUKernel::ReSize() { return RET_OK; } - int ScaleRun(int task_id, LiteParallelGroupEnv *penv, void *cdata) { - auto g_kernel = reinterpret_cast(cdata); - auto ret = g_kernel->Scale(task_id); + auto scale = reinterpret_cast(cdata); + auto ret = scale->Scale(task_id); if (ret != RET_OK) { MS_LOG(ERROR) << "ScaleRun error task_id[" << task_id << "] error_code[" << ret << "]"; return RET_ERROR; @@ -129,7 +133,16 @@ int ScaleRun(int task_id, LiteParallelGroupEnv *penv, void *cdata) { } int ScaleCPUKernel::Run() { - int ret = LiteBackendParallelLaunch(ScaleRun, this, thread_n_num_); + auto in_tensor = inputs_.front(); + input_ptr_ = reinterpret_cast(in_tensor->Data()); + if (scale_ == nullptr) { + auto scale_tensor = inputs_[1]; + scale_ = reinterpret_cast(scale_tensor->Data()); + } + auto out_tensor = outputs_.front(); + output_ptr_ = reinterpret_cast(out_tensor->Data()); + + int ret = LiteBackendParallelLaunch(ScaleRun, this, opParameter->thread_num_); if (ret != RET_OK) { MS_LOG(ERROR) << "Scale error error_code[" << ret << "]"; return RET_ERROR; @@ -160,7 +173,6 @@ kernel::LiteKernel *CpuScaleFp32KernelCreator(const std::vector &inputs, const std::vector &outputs, const lite::Context *ctx) - : LiteKernel(parameter, inputs, outputs), thread_num_(ctx->thread_num_) {} + : LiteKernel(parameter, inputs, outputs) { + opParameter->thread_num_ = ctx->thread_num_; + } ~ScaleCPUKernel() override = default; int Init() override; int ReSize() override; int Run() override; + int InitParameter(); + int InitScaleOffset(); int Scale(int task_id); private: - int thread_num_; - int thread_n_stride_; - int thread_n_num_; - int num_unit_; - int unit_size_; float *input_ptr_; float *scale_; float *offset_; float *output_ptr_; - bool has_offset_; }; } // namespace mindspore::kernel #endif // MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_FP32_SCALE_H_ - diff --git a/mindspore/lite/src/runtime/kernel/arm/opclib/scale.cc b/mindspore/lite/src/runtime/kernel/arm/opclib/scale.cc index df2700c09d..bedc15212b 100644 --- a/mindspore/lite/src/runtime/kernel/arm/opclib/scale.cc +++ b/mindspore/lite/src/runtime/kernel/arm/opclib/scale.cc @@ -17,37 +17,33 @@ #include "src/runtime/kernel/arm/opclib/scale.h" #include "src/runtime/kernel/arm/opclib/errorcode.h" -int DoScale(float *in_data, float *out_data, float *scale, float *offset, int units_offset, int num_unit, - ScaleParameter *scale_param) { +int DoScale(float *in_data, float *out_data, float *scale, float *offset, int task_id, ScaleParameter *scale_param) { if (in_data == nullptr || out_data == nullptr || scale == nullptr || offset == nullptr || scale_param == nullptr) { return OPCLIB_ERR; } - int in_stride_j = units_offset * scale_param->in_stride_; - for (int j = units_offset; j < units_offset + num_unit; j++) { - int channel = j % scale_param->channel_; - for (int k = 0; k < scale_param->in_stride_; k++) { - out_data[in_stride_j + k] = in_data[in_stride_j + k] * scale[channel] + offset[channel]; + if (scale_param->has_offset_) { + for (int out = task_id; out < scale_param->outer_size_; out += scale_param->op_parameter_.thread_num_) { + int out_offset = out * scale_param->axis_size_ * scale_param->inner_size_; + for (int i = 0; i < scale_param->axis_size_; i++) { + int axis_offset = out_offset + i * scale_param->inner_size_; + for (int in = 0; in < scale_param->inner_size_; in++) { + int in_offset = axis_offset + in; + out_data[in_offset] = in_data[in_offset] * scale[i] + offset[i]; + } + } } - in_stride_j = in_stride_j + scale_param->in_stride_; - } - return OPCLIB_OK; -} - -int DoScale(float *in_data, float *out_data, float *scale, int units_offset, int num_unit, - ScaleParameter *scale_param) { - if (in_data == nullptr || out_data == nullptr || scale == nullptr || scale_param == nullptr) { - return OPCLIB_ERR; - } - - int in_stride_j = units_offset * scale_param->in_stride_; - for (int j = units_offset; j < units_offset + num_unit; j++) { - int channel = j % scale_param->channel_; - for (int k = 0; k < scale_param->in_stride_; k++) { - out_data[in_stride_j + k] = in_data[in_stride_j + k] * scale[channel]; + } else { + for (int out = task_id; out < scale_param->outer_size_; out += scale_param->op_parameter_.thread_num_) { + int out_offset = out * scale_param->axis_size_ * scale_param->inner_size_; + for (int i = 0; i < scale_param->axis_size_; i++) { + int axis_offset = out_offset + i * scale_param->inner_size_; + for (int in = 0; in < scale_param->inner_size_; in++) { + int in_offset = axis_offset + in; + out_data[in_offset] = in_data[in_offset] * scale[i]; + } + } } - in_stride_j = in_stride_j + scale_param->in_stride_; } return OPCLIB_OK; } - diff --git a/mindspore/lite/src/runtime/kernel/arm/opclib/scale.h b/mindspore/lite/src/runtime/kernel/arm/opclib/scale.h index 077fb5ae57..01189ab0e3 100644 --- a/mindspore/lite/src/runtime/kernel/arm/opclib/scale.h +++ b/mindspore/lite/src/runtime/kernel/arm/opclib/scale.h @@ -21,15 +21,13 @@ struct ScaleParameter { OpParameter op_parameter_; - int out_count_; - int channel_; - int in_stride_; + int outer_size_; + int axis_size_; + int inner_size_; int axis_; - int num_axis_; + bool has_offset_; + // todo yangruoqi: axis }; -int DoScale(float *in_data, float *out_data, float *scale, float *offset, int units_offset, int num_unit, - ScaleParameter *scale_param); -int DoScale(float *in_data, float *out_data, float *scale, int units_offset, int num_unit, ScaleParameter *scale_param); +int DoScale(float *in_data, float *out_data, float *scale, float *offset, int task_id, ScaleParameter *scale_param); #endif // MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_OPCLIB_SCALE_H_ - diff --git a/mindspore/lite/tools/converter/parser/caffe/caffe_scale_parser.cc b/mindspore/lite/tools/converter/parser/caffe/caffe_scale_parser.cc index d2199f2abd..83198c2702 100644 --- a/mindspore/lite/tools/converter/parser/caffe/caffe_scale_parser.cc +++ b/mindspore/lite/tools/converter/parser/caffe/caffe_scale_parser.cc @@ -22,12 +22,9 @@ const int32_t DIM_DEFAULT_SIZE = 4; namespace mindspore { namespace lite { -STATUS CaffeScaleParser::Parse(const caffe::LayerParameter &proto, - const caffe::LayerParameter &weight, - schema::CNodeT *op, - std::vector *weightVec) { +STATUS CaffeScaleParser::Parse(const caffe::LayerParameter &proto, const caffe::LayerParameter &weight, + schema::CNodeT *op, std::vector *weightVec) { std::unique_ptr attr(new schema::ScaleT()); - attr->format = schema::Format_NCHW; if (weight.blobs_size() + weight.bottom_size() < 2) { // MS_LOGE("Scale bottom size:%d, blobs size:%d invalid in layer %s", weight.bottom_size(), weight.blobs_size(), @@ -36,12 +33,14 @@ STATUS CaffeScaleParser::Parse(const caffe::LayerParameter &proto, } const caffe::ScaleParameter scaleParam = weight.scale_param(); - int32_t axis = scaleParam.axis(); // NCHW_DIM_C; - uint32_t axis_index = NCHW_DIM_C; - - if (GetAxisIndex(axis, &axis_index)) { - // MS_LOGE("scale get axis failed for layer %s.", weight.name().c_str()); + int axis = NCHW_DIM_C; + if (scaleParam.has_axis()) { + uint32_t axis_index = NCHW_DIM_C; + if (GetAxisIndex(scaleParam.axis(), &axis_index)) { + // MS_LOGE("scale get axis failed for layer %s.", weight.name().c_str()); + } } + attr->axis = axis; // parse scale // todo expect only weight as scale not bias @@ -94,4 +93,3 @@ STATUS CaffeScaleParser::GetAxisIndex(const int32_t &axis, uint32_t *axis_index) CaffeNodeRegistrar g_caffeScaleParser("Scale", new CaffeScaleParser()); } // namespace lite } // namespace mindspore -