diff --git a/mindspore/lite/CMakeLists.txt b/mindspore/lite/CMakeLists.txt index 4918355e2a..f4806af9b3 100644 --- a/mindspore/lite/CMakeLists.txt +++ b/mindspore/lite/CMakeLists.txt @@ -219,6 +219,9 @@ add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/internal) add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/nnacl) if (ENABLE_TOOLS) add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/tools/benchmark) + if (SUPPORT_TRAIN) + add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/tools/net_train) + endif() endif() if (NOT WIN32) if (ENABLE_TOOLS) diff --git a/mindspore/lite/include/train_model.h b/mindspore/lite/include/train_model.h index c93cd342da..e1a3366761 100644 --- a/mindspore/lite/include/train_model.h +++ b/mindspore/lite/include/train_model.h @@ -18,32 +18,36 @@ #include #include "include/model.h" -namespace mindspore::lite { +namespace mindspore { +namespace lite { + +/// \brief TrainModel Defines a class that allows to import and export a mindsport trainable model struct TrainModel : public lite::Model { - /// \brief Static method to create a TrainModel pointer. - /// - /// \param[in] model_buf Define the buffer read from a model file. - /// \param[in] size Define bytes number of model buffer. + /// \brief Static method to create a TrainModel object /// - /// \return Pointer of MindSpore Lite TrainModel. + /// \param[in] model_buf A buffer that was read from a MS model file + /// \param[in] size Length of the buffer + // + /// \return Pointer to MindSpore Lite TrainModel static TrainModel *Import(const char *model_buf, size_t size); - /// \brief Free meta graph temporary buffer + /// \brief Free meta graph related data void Free() override; - /// \brief TrainModel destruct, free all memory + /// \brief Class destructor, free all memory virtual ~TrainModel(); - /// \brief Export Model into buf. + /// \brief Export Model into a buffer /// - /// \param[in] buf Define the buffer to Export into. If nullptr, buf will be allocated - /// \param[in] len size of the buffer. + /// \param[in] buf The buffer to Export into. If equal to nullptr, buf will be allocated + /// \param[in,out] len Size of the pre-allocated buffer, and returned size of the exported buffer /// /// \return Pointer to buffer with exported model - char* ExportBuf(char* buf, size_t* len) const; + char *ExportBuf(char *buf, size_t *len) const; size_t buf_size_; }; -} // namespace mindspore::lite +} // namespace lite +} // namespace mindspore #endif // MINDSPORE_LITE_INCLUDE_TRAIN_MODEL_H_ diff --git a/mindspore/lite/include/train_session.h b/mindspore/lite/include/train_session.h index c3cab39d61..fbe00bc8fb 100644 --- a/mindspore/lite/include/train_session.h +++ b/mindspore/lite/include/train_session.h @@ -25,16 +25,59 @@ namespace mindspore { namespace session { +/// \brief TrainSession Defines a class that allows training a MindSpore model class TrainSession : public session::LiteSession { public: + /// \brief Class destructor virtual ~TrainSession() = default; + + /// \brief Static method to create a TrainSession object + /// + /// \param[in] context Defines the context of the session to be created + /// + /// \return Pointer of MindSpore Lite TrainSession static TrainSession *CreateSession(lite::Context *context); + /// \brief Compile MindSpore Lite train model + /// + /// \note CompileTrainGraph should be called before RunGraph + /// + /// \param[in] model Define the model to be compiled + /// + /// \return STATUS as an error code of compiling graph, STATUS is defined in errorcode.h virtual int CompileTrainGraph(lite::TrainModel *model) = 0; + + /// \brief Export the trained model into a buffer + /// + /// \param[in] buf The buffer to Export into. If equal to nullptr, buf will be allocated + /// \param[in,out] len Size of the pre-allocated buffer, and returned size of the exported buffer + /// + /// \return pointer to the export buffer virtual void *ExportToBuf(char *buf, size_t *len) const = 0; - virtual void Train() = 0; + + /// \brief Save the trained model into a flatbuffer file + /// + /// \param[in] filename Filename to save flatbuffer to + /// + /// \return 0 on success or -1 in case of error + virtual int SaveToFile(const std::string &filename) const = 0; + + /// \brief Set model to train mode + /// \return STATUS as an error code of compiling graph, STATUS is defined in errorcode.h + virtual int Train() = 0; + + /// \brief Check mode of model + /// + /// \return boolean indication if model is in train mode bool IsTrain() { return train_mode_ == true; } - virtual void Eval() = 0; + + /// \brief Set model to eval mode + /// \return STATUS as an error code of compiling graph, STATUS is defined in errorcode.h + virtual int Eval() = 0; + + /// \brief Check mode of model + /// + /// \return boolean indication if model is in eval mode bool IsEval() { return train_mode_ == false; } protected: diff --git a/mindspore/lite/minddata/CMakeLists.txt b/mindspore/lite/minddata/CMakeLists.txt index 2e01d699fd..576c829b35a 100644 --- a/mindspore/lite/minddata/CMakeLists.txt +++ b/mindspore/lite/minddata/CMakeLists.txt @@ -270,11 +270,13 @@ if (BUILD_MINDDATA STREQUAL "full") ${CORE_DIR}/utils/ms_utils.cc ) + find_package(Threads REQUIRED) target_link_libraries(minddata-lite securec jpeg-turbo jpeg mindspore::json + Threads::Threads ) # ref: https://github.com/android/ndk/issues/1202 diff --git a/mindspore/lite/nnacl/fp32/batchnorm.c b/mindspore/lite/nnacl/fp32/batchnorm.c index 0e2449b1b8..a11138daca 100644 --- a/mindspore/lite/nnacl/fp32/batchnorm.c +++ b/mindspore/lite/nnacl/fp32/batchnorm.c @@ -55,20 +55,30 @@ void FusedBatchNormFp32(const void *input, const void *scale, const void *offset void FusedBatchNormFp32MeanVar(const float *input, float *run_mean, float *run_var, BatchNormParameter *param, float *save_mean, float *save_var) { - float N = (float)param->unit_; + const float N = (float)param->unit_; + const float VN = N; + const float VNUB = (N > 1.0f) ? (N - 1.0f) : 1.0f; + const float momentum = (1.0f - param->momentum_); + for (int i = 0; i < param->unit_; i++) { for (int c = 0; c < param->channel_; c++) { int idx = i * param->channel_ + c; run_mean[c] += input[idx]; - run_var[c] += input[idx] * input[idx]; } } - const float VN = (N > 1.0f) ? (N - 1.0f) : 1.0f; for (int c = 0; c < param->channel_; c++) { - run_mean[c] = run_mean[c] / N; - run_var[c] = run_var[c] / VN - run_mean[c] * run_mean[c]; - save_mean[c] = param->momentum_ * save_mean[c] + (1 - param->momentum_) * run_mean[c]; - const float var = run_var[c]; - save_var[c] = param->momentum_ * save_var[c] + (1 - param->momentum_) * var; + run_mean[c] /= N; + } + for (int i = 0; i < param->unit_; i++) { + for (int c = 0; c < param->channel_; c++) { + int idx = i * param->channel_ + c; + run_var[c] += (input[idx] - run_mean[c]) * (input[idx] - run_mean[c]); + } + } + for (int c = 0; c < param->channel_; c++) { + float unbiased_var = (run_var[c] / VNUB); + run_var[c] = (run_var[c] / VN); + save_mean[c] = momentum * save_mean[c] + (1.0f - momentum) * run_mean[c]; + save_var[c] = momentum * save_var[c] + (1.0f - momentum) * unbiased_var; } } diff --git a/mindspore/lite/nnacl/fp32_grad/activation_grad.c b/mindspore/lite/nnacl/fp32_grad/activation_grad.c index d7b070473d..df20d9447e 100644 --- a/mindspore/lite/nnacl/fp32_grad/activation_grad.c +++ b/mindspore/lite/nnacl/fp32_grad/activation_grad.c @@ -72,7 +72,7 @@ int HSwishGrad(float *src0, float *src1, int length, float *dst) { int HSigmoidGrad(float *src0, float *src1, int length, float *dst) { for (int i = 0; i < length; ++i) { - float tmp = (src1[i] > 3.0f ? 1.0f : (src1[i] < -3.0f ? 0.0f : 1.0f / 6.0f)); + float tmp = (src1[i] > 3.0f ? 0.0f : (src1[i] < -3.0f ? 0.0f : 1.0f / 6.0f)); dst[i] = tmp * src0[i]; } return NNACL_OK; diff --git a/mindspore/lite/nnacl/fp32_grad/arithmetic_grad.c b/mindspore/lite/nnacl/fp32_grad/arithmetic_grad.c index 6e5b56ecf5..4c0ebe628b 100644 --- a/mindspore/lite/nnacl/fp32_grad/arithmetic_grad.c +++ b/mindspore/lite/nnacl/fp32_grad/arithmetic_grad.c @@ -15,6 +15,8 @@ */ #include "nnacl/fp32_grad/arithmetic_grad.h" +#include +#include "nnacl/fp32_grad/utils.h" void ElementDivNegSquare(const float *nom, const float *denom, float *output, int element_size) { for (int i = 0; i < element_size; i++) { @@ -27,3 +29,103 @@ void ElementMulAndDivNegSquare(const float *a, const float *b, const float *deno output[i] = -a[i] * b[i] / (denom[i] * denom[i]); } } + +void MaximumByAxes(const float *input0, const float *input1, const float *dy, const int *input0_dims, + const int *input1_dims, const int *dy_dims, float *output0, float *output1, int num_dims) { + int num_output0 = 1; + int num_output1 = 1; + int same_shape = 1; + for (int idx = 0; idx < num_dims; ++idx) { + num_output0 *= input0_dims[idx]; + num_output1 *= input1_dims[idx]; + if (input0_dims[idx] != input1_dims[idx]) { + same_shape = 0; + } + } + + if (same_shape) { + int input_iter[8] = {0}; + + // Iterate through input_data. + do { + size_t offset = GetInputOffset(num_dims, input0_dims, input_iter); + output0[offset] = input0[offset] > input1[offset] ? dy[offset] : 0.; + output1[offset] = input1[offset] >= input0[offset] ? dy[offset] : 0.; + } while (NextIndex(num_dims, input0_dims, input_iter)); + } else { + memset(output0, 0, num_output0 * sizeof(float)); // zero output + memset(output1, 0, num_output1 * sizeof(float)); // zero output + + int input_iter[8] = {0}; + int axes0[5] = {0}; + int axes1[5] = {0}; + int num_axes0 = 0; + int num_axes1 = 0; + for (int i = 0; i < num_dims; i++) { + if (input0_dims[i] == 1) { + axes0[num_axes0++] = i; + } + if (input1_dims[i] == 1) { + axes1[num_axes1++] = i; + } + } + + do { + size_t offset0 = GetOutputOffset(num_dims, input0_dims, input_iter, num_axes0, axes0); + size_t offset1 = GetOutputOffset(num_dims, input1_dims, input_iter, num_axes1, axes1); + size_t yt_offset = GetInputOffset(num_dims, input0_dims, input_iter); + output0[offset0] += input0[offset0] > input1[offset1] ? dy[yt_offset] : 0.; + output1[offset1] += input1[offset1] >= input0[offset0] ? dy[yt_offset] : 0.; + } while (NextIndex(num_dims, dy_dims, input_iter)); + } +} + +void MinimumByAxes(const float *input0, const float *input1, const float *dy, const int *input0_dims, + const int *input1_dims, const int *dy_dims, float *output0, float *output1, int num_dims) { + int num_output0 = 1; + int num_output1 = 1; + int same_shape = 1; + for (int idx = 0; idx < num_dims; ++idx) { + num_output0 *= input0_dims[idx]; + num_output1 *= input1_dims[idx]; + if (input0_dims[idx] != input1_dims[idx]) { + same_shape = 0; + } + } + + if (same_shape) { + int input_iter[8] = {0}; + + // Iterate through input_data. + do { + size_t offset = GetInputOffset(num_dims, input0_dims, input_iter); + output0[offset] = input0[offset] < input1[offset] ? dy[offset] : 0.; + output1[offset] = input1[offset] <= input0[offset] ? dy[offset] : 0.; + } while (NextIndex(num_dims, input0_dims, input_iter)); + } else { + memset(output0, 0, num_output0 * sizeof(float)); // zero output + memset(output1, 0, num_output1 * sizeof(float)); // zero output + + int input_iter[8] = {0}; + int axes0[5] = {0}; + int axes1[5] = {0}; + int num_axes0 = 0; + int num_axes1 = 0; + for (int i = 0; i < num_dims; i++) { + if (input0_dims[i] == 1) { + axes0[num_axes0++] = i; + } + if (input1_dims[i] == 1) { + axes1[num_axes1++] = i; + } + } + + do { + size_t offset0 = GetOutputOffset(num_dims, input0_dims, input_iter, num_axes0, axes0); + size_t offset1 = GetOutputOffset(num_dims, input1_dims, input_iter, num_axes1, axes1); + size_t yt_offset = GetInputOffset(num_dims, input0_dims, input_iter); + output0[offset0] += input0[offset0] < input1[offset1] ? dy[yt_offset] : 0.; + output1[offset1] += input1[offset1] <= input0[offset0] ? dy[yt_offset] : 0.; + } while (NextIndex(num_dims, dy_dims, input_iter)); + } +} diff --git a/mindspore/lite/nnacl/fp32_grad/arithmetic_grad.h b/mindspore/lite/nnacl/fp32_grad/arithmetic_grad.h index 34486ab7fa..da78a314ef 100644 --- a/mindspore/lite/nnacl/fp32_grad/arithmetic_grad.h +++ b/mindspore/lite/nnacl/fp32_grad/arithmetic_grad.h @@ -16,11 +16,17 @@ #ifndef MINDSPORE_LITE_NNACL_FP32_GRAD_ARITHMETIC_GRAD_H_ #define MINDSPORE_LITE_NNACL_FP32_GRAD_ARITHMETIC_GRAD_H_ +#include "nnacl/op_base.h" + #ifdef __cplusplus extern "C" { #endif void ElementDivNegSquare(const float *nom, const float *denom, float *output, int element_size); void ElementMulAndDivNegSquare(const float *a, const float *b, const float *denom, float *output, int element_size); +void MaximumByAxes(const float *input0, const float *input1, const float *dy, const int *input0_dims, + const int *input1_dims, const int *dy_dims, float *output0, float *output1, int num_dims); +void MinimumByAxes(const float *input0, const float *input1, const float *dy, const int *input0_dims, + const int *input1_dims, const int *dy_dims, float *output0, float *output1, int num_dims); #ifdef __cplusplus } #endif diff --git a/mindspore/lite/nnacl/fp32_grad/batch_norm.c b/mindspore/lite/nnacl/fp32_grad/batch_norm.c index a353dbc85f..d2f489dc52 100644 --- a/mindspore/lite/nnacl/fp32_grad/batch_norm.c +++ b/mindspore/lite/nnacl/fp32_grad/batch_norm.c @@ -17,66 +17,55 @@ #include #include "nnacl/fp32_grad/batch_norm.h" -void sumSpatialBatch(const float *in, int size, int ch, float *out) { +void sumSpatialBatch(const float *in, size_t size, int ch, float *out) { memset(out, 0, ch * sizeof(float)); - for (int i = 0; i < size; i++) { - const float *ptr = in + i * ch; - for (int c = 0; c < ch; c++) { + for (size_t i = 0; i < size; i++) { + const float *ptr = in + (i * ch); + for (size_t c = 0; c < ch; c++) { out[c] += ptr[c]; } } } -static void meanVar(const float *in, int size, int ch, float eps, float *mean, float *invar) { - float N = (float)(size); - sumSpatialBatch(in, N, ch, mean); - for (int f = 0; f < ch; ++f) { - mean[f] /= N; - } - for (int f = 0; f < ch; f++) { - float tvar = 0; - for (int i = 0; i < N; i++) { - float x = in[i * ch + f]; - tvar += (x - mean[f]) * (x - mean[f]); - } - invar[f] = 1.0f / (sqrt(tvar / N + eps)); - } -} - -void backwardX(const float *in, const float *dout, const float *scale, const int size, int channels, float eps, - float *mean, float *invar, float *dxhathat_sum, float *dxhat_sum, float *out) { - meanVar(in, size, channels, eps, mean, invar); - for (int i = 0; i < size; i++) { - for (int f = 0; f < channels; f++) { - int ix = i * channels + f; +void backwardX(const float *in, const float *dout, const float *scale, const size_t size, int channels, float *mean, + float *invar, float *dxhathat_sum, float *dxhat_sum, float *out) { + const float N = (size); + for (size_t i = 0; i < size; i++) { + for (size_t f = 0; f < channels; f++) { + size_t ix = i * channels + f; float x_hat = (in[ix] - mean[f]) * invar[f]; - float dxhat = dout[ix] * scale[f]; - dxhat_sum[f] += dxhat; - dxhathat_sum[f] += dxhat * x_hat; + float dx_hat = dout[ix] * scale[f]; + dxhat_sum[f] += dx_hat; + dxhathat_sum[f] += dx_hat * x_hat; } } - for (int i = 0; i < size; i++) { - for (int f = 0; f < channels; f++) { - int ix = i * channels + f; + for (size_t i = 0; i < size; i++) { + for (size_t f = 0; f < channels; f++) { + size_t ix = i * channels + f; float x_hat = (in[ix] - mean[f]) * invar[f]; - float dxhat = dout[ix] * scale[f]; - out[ix] = 1.f / size * invar[f] * (size * dxhat - dxhat_sum[f] - x_hat * dxhathat_sum[f]); + float dx_hat = dout[ix] * scale[f]; + out[ix] = 1.0f / N * (invar[f]) * (N * dx_hat - dxhat_sum[f] - x_hat * dxhathat_sum[f]); } } } -void backwardScale(const float *x, const float *mean, const float *invar, const float *delta, int batch, - int n, int size, float *scale_updates) { - int i, b, f; +void backwardScale(const float *x, const float *mean, const float *invar, const float *delta, int batch, int n, + int size, float *scale_updates) { + size_t i, b, f; memset(scale_updates, 0, n * sizeof(float)); for (b = 0; b < batch; ++b) { for (i = 0; i < size; ++i) { for (f = 0; f < n; ++f) { int index = (b * size + i) * n + f; float x_norm = (x[index] - mean[f]) * invar[f]; - scale_updates[f] += delta[index] * x_norm; + scale_updates[f] += (delta[index] * x_norm); } } } } +void var2Invar(float *save_var, size_t size, float eps) { + for (size_t i = 0; i < size; i++) { + save_var[i] = 1.0f / sqrt(save_var[i] + eps); + } +} diff --git a/mindspore/lite/nnacl/fp32_grad/batch_norm.h b/mindspore/lite/nnacl/fp32_grad/batch_norm.h index 307610b724..53cc6437da 100644 --- a/mindspore/lite/nnacl/fp32_grad/batch_norm.h +++ b/mindspore/lite/nnacl/fp32_grad/batch_norm.h @@ -29,11 +29,12 @@ typedef struct BNGradParameter { extern "C" { #endif -void sumSpatialBatch(const float *in, int size, int ch, float *out); -void backwardX(const float *in, const float *dout, const float *scale, const int size, int channels, float eps, - float *mean, float *invar, float *xhat_sum, float *dxhat_sum, float *out); -void backwardScale(const float *x, const float *mean, const float *invar, const float *delta, int batch, - int n, int size, float *scale_updates); +void sumSpatialBatch(const float *in, size_t size, int ch, float *out); +void backwardX(const float *in, const float *dout, const float *scale, const size_t size, int channels, float *mean, + float *invar, float *xhat_sum, float *dxhat_sum, float *out); +void backwardScale(const float *x, const float *mean, const float *invar, const float *delta, int batch, int n, + int size, float *scale_updates); +void var2Invar(float *save_var, size_t size, float eps); #ifdef __cplusplus } diff --git a/mindspore/lite/nnacl/fp32_grad/dropout_grad.c b/mindspore/lite/nnacl/fp32_grad/dropout_grad.c new file mode 100644 index 0000000000..b54924bada --- /dev/null +++ b/mindspore/lite/nnacl/fp32_grad/dropout_grad.c @@ -0,0 +1,23 @@ +/** + * Copyright 2020 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "nnacl/fp32_grad/dropout_grad.h" + +void DropoutGrad(const float *yt_ptr, const float *mask, float *output_ptr, int length, float scale) { + for (int i = 0; i < length; i++) { + output_ptr[i] = yt_ptr[i] * mask[i] * scale; + } +} diff --git a/mindspore/lite/nnacl/fp32_grad/dropout_grad.h b/mindspore/lite/nnacl/fp32_grad/dropout_grad.h new file mode 100644 index 0000000000..1124eb29f6 --- /dev/null +++ b/mindspore/lite/nnacl/fp32_grad/dropout_grad.h @@ -0,0 +1,31 @@ +/** + * Copyright 2020 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef MINDSPORE_LITE_NNACL_FP32_GRAD_DROPOUT_GRAD_H_ +#define MINDSPORE_LITE_NNACL_FP32_GRAD_DROPOUT_GRAD_H_ + +#include "nnacl/op_base.h" + +#ifdef __cplusplus +extern "C" { +#endif + +void DropoutGrad(const float *yt_ptr, const float *mask, float *output_ptr, int length, float ratio); +#ifdef __cplusplus +} +#endif + +#endif // MINDSPORE_LITE_NNACL_FP32_GRAD_DROPOUT_GRAD_H_ diff --git a/mindspore/lite/nnacl/fp32_grad/dropout_parameter.h b/mindspore/lite/nnacl/fp32_grad/dropout_parameter.h new file mode 100644 index 0000000000..789254abae --- /dev/null +++ b/mindspore/lite/nnacl/fp32_grad/dropout_parameter.h @@ -0,0 +1,27 @@ +/** + * Copyright 2020 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef MINDSPORE_LITE_NNACL_FP32_GRAD_DROPOUT_PARAMETER_H_ +#define MINDSPORE_LITE_NNACL_FP32_GRAD_DROPOUT_PARAMETER_H_ + +#include "nnacl/op_base.h" + +typedef struct DropoutParameter { + OpParameter op_parameter_; + float ratio_; +} DropoutParameter; + +#endif // MINDSPORE_LITE_NNACL_FP32_GRAD_DROPOUT_PARAMETER_H_ diff --git a/mindspore/lite/nnacl/fp32_grad/gemm.c b/mindspore/lite/nnacl/fp32_grad/gemm.c index 0ec25c4141..92a718cef6 100644 --- a/mindspore/lite/nnacl/fp32_grad/gemm.c +++ b/mindspore/lite/nnacl/fp32_grad/gemm.c @@ -16,182 +16,536 @@ #include "nnacl/fp32_grad/gemm.h" #include +#ifdef __ARM_NEON +#include +#endif +#include "nnacl/fp32/matmul.h" -static void gemm_not_trana_not_tranb(int M, int N, int K, float alpha, float *mat_a, int lda, float *mat_b, int ldb, - float *mat_c, int ldc) { - const int block_size = 4; - int block_mod = N % block_size; - int block_c4 = N - block_mod; - - int i, j, k; - for (i = 0; i < M; ++i) { - for (k = 0; k < K; ++k) { - float a = alpha * mat_a[i * lda + k]; - for (j = 0; j < block_c4; j += block_size) { - float *b = &mat_b[k * ldb + j]; - float *c = &mat_c[i * ldc + j]; - c[0] += a * b[0]; - c[1] += a * b[1]; - c[2] += a * b[2]; - c[3] += a * b[3]; - } - for (; j < N; ++j) { - mat_c[i * ldc + j] += a * mat_b[k * ldb + j]; - } +static void addv(const float *restrict v1, float *restrict v2, float beta, int row, int col, int stride) { + const float *src_ptr = v1; + float *dst_ptr = v2; + for (int r = 0; r < row; r++) { + for (int c = 0; c < col; c++) { + dst_ptr[c] += beta * src_ptr[c]; + } + src_ptr += stride; + dst_ptr += stride; + } +} + +int MatSize(int row, int col, int round) { + int res = UP_ROUND(row, round) * col; + return res; +} + +int MatSizeTotal(int row, int col, int deep, int stride) { +#ifdef ENABLE_ARM32 + const int num = C4NUM; +#else + const int num = C12NUM; +#endif + int res = MatSize(row, deep, num) + MatSize(col, deep, C8NUM); + if (stride > 0) res += row * stride; + return res; +} +#ifdef ENABLE_ARM32 +static void RowMajor2Row4MajorStride(const float *src_ptr, float *dst_ptr, int row, int col, int lead) { + for (int r = 0; r < row; r++) { + const float *src = src_ptr + r * lead; + for (int c = 0; c < col; c++) { + int cd8 = c / 4; + int cm8 = c % 4; + dst_ptr[cd8 * 4 * row + r * 4 + cm8] = src[c]; + } + } +} +#endif + +static void RowMajor2Row8MajorStride(const float *src_ptr, float *dst_ptr, int row, int col, int lead) { + for (int r = 0; r < row; r++) { + const float *src = src_ptr + r * lead; + for (int c = 0; c < col; c++) { + int cd8 = c / 8; + int cm8 = c % 8; + dst_ptr[cd8 * 8 * row + r * 8 + cm8] = src[c]; + } + } + return; +} + +#ifndef ENABLE_ARM32 +static void RowMajor2Row12MajorStride(const float *src_ptr, float *dst_ptr, int row, int col, int lead) { + for (int r = 0; r < row; r++) { + const float *src = src_ptr + r * lead; + for (int c = 0; c < col; c++) { + int cd8 = c / C12NUM; + int cm8 = c % C12NUM; + dst_ptr[cd8 * C12NUM * row + r * C12NUM + cm8] = src[c]; } } + return; } -static void gemm_not_trana_tranb(int M, int N, int K, float alpha, float *mat_a, int lda, float *mat_b, int ldb, - float *mat_c, int ldc) { - const int block_size = 4; - int block_mod = K % block_size; - int block_c4 = K - block_mod; - - int i, j, k; - for (i = 0; i < M; ++i) { - for (j = 0; j < N; ++j) { - float sum = 0; - for (k = 0; k < block_c4; k += block_size) { - float *a = &mat_a[i * lda + k]; - float *b = &mat_b[j * ldb + k]; - sum += alpha * a[0] * b[0]; - sum += alpha * a[1] * b[1]; - sum += alpha * a[2] * b[2]; - sum += alpha * a[3] * b[3]; +static void RowMajor2Col12MajorStride(const float *src_ptr, float *dst_ptr, size_t row, size_t col, int lead) { + size_t row_up_12 = UP_ROUND(row, C12NUM); + size_t row12 = row / C12NUM * C12NUM; + size_t col4 = col / C4NUM * C4NUM; + const float *src_r = src_ptr; + float *dst_r = dst_ptr; + + size_t ri = 0; + for (; ri < row12; ri += C12NUM) { + size_t ci = 0; + for (; ci < col4; ci += C4NUM) { + const float *src_c = src_r + ci; + float *dst_c = dst_r + ci * C12NUM; + + /* 12x4 row-major to col-major */ +#ifdef ENABLE_ARM64 + size_t stride = lead * sizeof(float); + asm volatile( + "mov x10, %[src_c]\n" + "mov x11, %[dst_c]\n" + + "ld1 {v0.4s}, [x10], %[stride]\n" + "ld1 {v1.4s}, [x10], %[stride]\n" + "ld1 {v2.4s}, [x10], %[stride]\n" + "ld1 {v3.4s}, [x10], %[stride]\n" + + "ld1 {v4.4s}, [x10], %[stride]\n" + "ld1 {v5.4s}, [x10], %[stride]\n" + "ld1 {v6.4s}, [x10], %[stride]\n" + "ld1 {v7.4s}, [x10], %[stride]\n" + + "zip1 v12.4s, v0.4s, v1.4s\n" + "zip2 v13.4s, v0.4s, v1.4s\n" + "zip1 v14.4s, v2.4s, v3.4s\n" + "zip2 v15.4s, v2.4s, v3.4s\n" + + "ld1 {v8.4s}, [x10], %[stride]\n" + "ld1 {v9.4s}, [x10], %[stride]\n" + "ld1 {v10.4s}, [x10], %[stride]\n" + "ld1 {v11.4s}, [x10], %[stride]\n" + + "zip1 v16.4s, v4.4s, v5.4s\n" + "zip2 v17.4s, v4.4s, v5.4s\n" + "zip1 v18.4s, v6.4s, v7.4s\n" + "zip2 v19.4s, v6.4s, v7.4s\n" + + "trn1 v20.2d, v12.2d, v14.2d\n" + "trn2 v23.2d, v12.2d, v14.2d\n" + "trn1 v26.2d, v13.2d, v15.2d\n" + "trn2 v29.2d, v13.2d, v15.2d\n" + + "trn1 v21.2d, v16.2d, v18.2d\n" + "trn2 v24.2d, v16.2d, v18.2d\n" + "trn1 v27.2d, v17.2d, v19.2d\n" + "trn2 v30.2d, v17.2d, v19.2d\n" + + "zip1 v12.4s, v8.4s, v9.4s\n" + "zip2 v13.4s, v8.4s, v9.4s\n" + "zip1 v14.4s, v10.4s, v11.4s\n" + "zip2 v15.4s, v10.4s, v11.4s\n" + + "trn1 v22.2d, v12.2d, v14.2d\n" + "trn2 v25.2d, v12.2d, v14.2d\n" + "trn1 v28.2d, v13.2d, v15.2d\n" + "trn2 v31.2d, v13.2d, v15.2d\n" + + "st1 {v20.4s, v21.4s, v22.4s, v23.4s}, [x11], #64\n" + "st1 {v24.4s, v25.4s, v26.4s, v27.4s}, [x11], #64\n" + "st1 {v28.4s, v29.4s, v30.4s, v31.4s}, [x11], #64\n" + + : + : [ dst_c ] "r"(dst_c), [ src_c ] "r"(src_c), [ stride ] "r"(stride) + : "x10", "x11", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8", "v9", "v10", "v11", "v12", "v13", "v14", + "v15", "v16", "v17", "v18", "v19", "v20", "v21", "v22", "v23", "v24", "v25", "v26", "v27", "v28", "v29", + "v30", "v31"); +#elif ENABLE_ARM32 + size_t stride = lead * sizeof(float); + asm volatile( + "mov r10, %[src_c]\n" + "mov r12, %[dst_c]\n" + + "vld1.32 {q0}, [r10], %[stride]\n" + "vld1.32 {q3}, [r10], %[stride]\n" + "vld1.32 {q10}, [r10], %[stride]\n" + "vld1.32 {q13}, [r10], %[stride]\n" + + "vtrn.32 d0, d6\n" + "vtrn.32 d1, d7\n" + "vtrn.32 d20, d26\n" + "vtrn.32 d21, d27\n" + + "vld1.32 {q1}, [r10], %[stride]\n" + "vld1.32 {q8}, [r10], %[stride]\n" + "vld1.32 {q11}, [r10], %[stride]\n" + "vld1.32 {q14}, [r10], %[stride]\n" + + "vswp d1, d20\n" + "vswp d7, d26\n" + + "vld1.32 {q2}, [r10], %[stride]\n" + "vld1.32 {q9}, [r10], %[stride]\n" + "vld1.32 {q12}, [r10], %[stride]\n" + "vld1.32 {q15}, [r10], %[stride]\n" + + "vtrn.32 d2, d16\n" + "vtrn.32 d3, d17\n" + "vtrn.32 d22, d28\n" + "vtrn.32 d23, d29\n" + + "vswp d3, d22\n" + "vswp d17, d28\n" + + "vtrn.32 d4, d18\n" + "vtrn.32 d5, d19\n" + "vtrn.32 d24, d30\n" + "vtrn.32 d25, d31\n" + + "vswp d5, d24\n" + "vswp d19, d30\n" + + "vst1.32 {q0, q1}, [r12]!\n" + "vst1.32 {q2, q3}, [r12]!\n" + "vst1.32 {q8, q9}, [r12]!\n" + "vst1.32 {q10, q11}, [r12]!\n" + "vst1.32 {q12, q13}, [r12]!\n" + "vst1.32 {q14, q15}, [r12]!\n" + + : + : [ dst_c ] "r"(dst_c), [ src_c ] "r"(src_c), [ stride ] "r"(stride) + : "r10", "r12", "q0", "q1", "q2", "q3", "q8", "q9", "q10", "q11", "q12", "q13", "q14", "q15"); +#else + for (int tr = 0; tr < C12NUM; tr++) { + for (int tc = 0; tc < C4NUM; tc++) { + dst_c[tc * C12NUM + tr] = src_c[tr * lead + tc]; + } } - for (; k < K; ++k) { - sum += alpha * mat_a[i * lda + k] * mat_b[j * ldb + k]; +#endif + } + for (; ci < col; ci++) { + const float *src_c = src_r + ci; + float *dst_c = dst_r + ci * C12NUM; + for (size_t i = 0; i < C12NUM; i++) { + dst_c[i] = src_c[i * lead]; } - mat_c[i * ldc + j] += sum; } + src_r += C12NUM * lead; + dst_r += C12NUM * col; } + + for (; ri < row; ri++) { + for (size_t i = 0; i < col; i++) { + dst_r[i * C12NUM] = src_r[i]; + } + src_r += lead; + dst_r += 1; + } + + for (; ri < row_up_12; ri++) { + for (size_t i = 0; i < col; i++) { + dst_r[i * C12NUM] = 0; + } + dst_r += 1; + } + return; } +#endif + +static void RowMajor2Col8MajorStride(const float *src_ptr, float *dst_ptr, size_t row, size_t col, int lead) { + size_t row8 = row / C8NUM * C8NUM; +#ifdef ENABLE_ARM64 + size_t col_skip = col / C8NUM * C8NUM; + int skip_size = C8NUM; +#else + size_t col_skip = col / C4NUM * C4NUM; + int skip_size = C4NUM; +#endif + const float *src_r = src_ptr; + float *dst_r = dst_ptr; + + size_t ri = 0; + for (; ri < row8; ri += C8NUM) { + size_t ci = 0; + for (; ci < col_skip; ci += skip_size) { + const float *src_c = src_r + ci; + float *dst_c = dst_r + ci * C8NUM; + +#ifdef ENABLE_ARM64 + /* 8x8 row-major to col-major */ + size_t stride = lead * sizeof(float); + asm volatile( + "mov x10, %[src_c]\n" + "mov x11, %[dst_c]\n" + + "ld1 {v0.4s, v1.4s}, [x10], %[stride]\n" + "ld1 {v2.4s, v3.4s}, [x10], %[stride]\n" + "ld1 {v4.4s, v5.4s}, [x10], %[stride]\n" + "ld1 {v6.4s, v7.4s}, [x10], %[stride]\n" + + "zip1 v8.4s, v0.4s, v2.4s\n" + "zip2 v9.4s, v0.4s, v2.4s\n" + "zip1 v10.4s, v4.4s, v6.4s\n" + "zip2 v11.4s, v4.4s, v6.4s\n" + + "ld1 {v16.4s, v17.4s}, [x10], %[stride]\n" + "ld1 {v18.4s, v19.4s}, [x10], %[stride]\n" + "ld1 {v20.4s, v21.4s}, [x10], %[stride]\n" + "ld1 {v22.4s, v23.4s}, [x10], %[stride]\n" + + "zip1 v12.4s, v1.4s, v3.4s\n" + "zip2 v13.4s, v1.4s, v3.4s\n" + "zip1 v14.4s, v5.4s, v7.4s\n" + "zip2 v15.4s, v5.4s, v7.4s\n" + + "trn1 v0.2d, v8.2d, v10.2d\n" + "trn2 v1.2d, v8.2d, v10.2d\n" + "trn1 v2.2d, v9.2d, v11.2d\n" + "trn2 v3.2d, v9.2d, v11.2d\n" + + "zip1 v24.4s, v16.4s, v18.4s\n" + "zip2 v25.4s, v16.4s, v18.4s\n" + "zip1 v26.4s, v20.4s, v22.4s\n" + "zip2 v27.4s, v20.4s, v22.4s\n" + + "trn1 v4.2d, v12.2d, v14.2d\n" + "trn2 v5.2d, v12.2d, v14.2d\n" + "trn1 v6.2d, v13.2d, v15.2d\n" + "trn2 v7.2d, v13.2d, v15.2d\n" + + "zip1 v28.4s, v17.4s, v19.4s\n" + "zip2 v29.4s, v17.4s, v19.4s\n" + "zip1 v30.4s, v21.4s, v23.4s\n" + "zip2 v31.4s, v21.4s, v23.4s\n" + + "trn1 v16.2d, v24.2d, v26.2d\n" + "trn2 v17.2d, v24.2d, v26.2d\n" + "trn1 v18.2d, v25.2d, v27.2d\n" + "trn2 v19.2d, v25.2d, v27.2d\n" + + "trn1 v20.2d, v28.2d, v30.2d\n" + "trn2 v21.2d, v28.2d, v30.2d\n" + "trn1 v22.2d, v29.2d, v31.2d\n" + "trn2 v23.2d, v29.2d, v31.2d\n" + + "st1 {v0.4s}, [x11], #16\n" + "st1 {v16.4s}, [x11], #16\n" + "st1 {v1.4s}, [x11], #16\n" + "st1 {v17.4s}, [x11], #16\n" + "st1 {v2.4s}, [x11], #16\n" + "st1 {v18.4s}, [x11], #16\n" + "st1 {v3.4s}, [x11], #16\n" + "st1 {v19.4s}, [x11], #16\n" + "st1 {v4.4s}, [x11], #16\n" + "st1 {v20.4s}, [x11], #16\n" + "st1 {v5.4s}, [x11], #16\n" + "st1 {v21.4s}, [x11], #16\n" + "st1 {v6.4s}, [x11], #16\n" + "st1 {v22.4s}, [x11], #16\n" + "st1 {v7.4s}, [x11], #16\n" + "st1 {v23.4s}, [x11], #16\n" + + : + : [ dst_c ] "r"(dst_c), [ src_c ] "r"(src_c), [ stride ] "r"(stride) + : "x10", "x11", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8", "v9", "v10", "v11", "v12", "v13", "v14", + "v15", "v16", "v17", "v18", "v19", "v20", "v21", "v22", "v23", "v24", "v25", "v26", "v27", "v28", "v29", + "v30", "v31"); +#elif ENABLE_ARM32 + /* 8x4 row-major to col-major */ + size_t stride = col * sizeof(float); + asm volatile( + "mov r10, %[src_c]\n" + "mov r11, %[dst_c]\n" + + "vld1.32 {q0}, [r10], %[stride]\n" + "vld1.32 {q2}, [r10], %[stride]\n" + "vld1.32 {q4}, [r10], %[stride]\n" + "vld1.32 {q6}, [r10], %[stride]\n" + + "vtrn.32 d0, d4\n" + "vtrn.32 d1, d5\n" + "vtrn.32 d8, d12\n" + "vtrn.32 d9, d13\n" + + "vld1.32 {q1}, [r10], %[stride]\n" + "vld1.32 {q3}, [r10], %[stride]\n" + "vld1.32 {q5}, [r10], %[stride]\n" + "vld1.32 {q7}, [r10], %[stride]\n" -static void gemm_trana_not_tranb(int M, int N, int K, float alpha, float *mat_a, int lda, float *mat_b, int ldb, - float *mat_c, int ldc) { - const int block_size = 4; - int block_mod = N % block_size; - int block_c4 = N - block_mod; - - int i, j, k; - for (i = 0; i < M; ++i) { - for (k = 0; k < K; ++k) { - float a = alpha * mat_a[k * lda + i]; - for (j = 0; j < block_c4; j += block_size) { - float *b = &mat_b[k * ldb + j]; - float *c = &mat_c[i * ldc + j]; - c[0] += a * b[0]; - c[1] += a * b[1]; - c[2] += a * b[2]; - c[3] += a * b[3]; + "vswp d1, d8\n" + "vswp d5, d12\n" + + "vtrn.32 d2, d6\n" + "vtrn.32 d3, d7\n" + "vtrn.32 d10, d14\n" + "vtrn.32 d11, d15\n" + + "vswp d3, d10\n" + "vswp d7, d14\n" + + "vst1.32 {q0, q1}, [r11]!\n" + "vst1.32 {q2, q3}, [r11]!\n" + "vst1.32 {q4, q5}, [r11]!\n" + "vst1.32 {q6, q7}, [r11]!\n" + + : + : [ dst_c ] "r"(dst_c), [ src_c ] "r"(src_c), [ stride ] "r"(stride) + : "r10", "r11", "q0", "q1", "q2", "q3", "q4", "q5", "q6", "q7"); +#else + for (int tr = 0; tr < 8; tr++) { + for (int tc = 0; tc < 4; tc++) { + dst_c[tc * 8 + tr] = src_c[tr * lead + tc]; + } } - for (; j < N; ++j) { - mat_c[i * ldc + j] += a * mat_b[k * ldb + j]; +#endif + } + for (; ci < col; ci++) { + const float *src_c = src_r + ci; + float *dst_c = dst_r + ci * C8NUM; + for (size_t i = 0; i < C8NUM; i++) { + dst_c[i] = src_c[i * lead]; } } + src_r += C8NUM * lead; + dst_r += C8NUM * col; } + for (; ri < row; ri++) { + for (size_t i = 0; i < col; i++) { + dst_r[i * C8NUM] = src_r[i]; + } + src_r += lead; + dst_r += 1; + } + return; } +#ifdef ENABLE_ARM32 +static void RowMajor2Col4MajorStride(const float *src_ptr, float *dst_ptr, size_t row, size_t col, int lead) { + size_t row8 = row / C4NUM * C4NUM; + size_t col4 = col / C4NUM * C4NUM; + const float *src_r = src_ptr; + float *dst_r = dst_ptr; + + size_t ri = 0; + for (; ri < row8; ri += C4NUM) { + size_t ci = 0; + for (; ci < col4; ci += C4NUM) { + const float *src_c = src_r + ci; + float *dst_c = dst_r + ci * C4NUM; + + /* 4x4 row-major to col-major */ +#ifdef ENABLE_ARM32 + size_t stride = col * 4; + asm volatile( + "mov r10, %[src_c]\n" + "mov r12, %[dst_c]\n" + + "vld1.32 {q0}, [r10], %[stride]\n" + "vld1.32 {q1}, [r10], %[stride]\n" + "vld1.32 {q2}, [r10], %[stride]\n" + "vld1.32 {q3}, [r10], %[stride]\n" + + "vtrn.32 d0, d2\n" + "vtrn.32 d1, d3\n" + "vtrn.32 d4, d6\n" + "vtrn.32 d5, d7\n" -static void gemm_trana_tranb(int M, int N, int K, float alpha, float *mat_a, int lda, float *mat_b, int ldb, - float *mat_c, int ldc) { - int i, j, k; - const int block_size = 4; - int k_block_mod = K % block_size; - int k_block_c4 = K - k_block_mod; - - int m_block_mod = M % block_size; - int m_block_c4 = M - m_block_mod; - - for (i = 0; i < m_block_c4; i += block_size) { - for (j = 0; j < N; ++j) { - float sum0 = 0; - float sum1 = 0; - float sum2 = 0; - float sum3 = 0; - - for (k = 0; k < k_block_c4; k += block_size) { - float *b = &mat_b[j * ldb + k]; - sum0 += alpha * mat_a[i + k * lda] * b[0]; - sum0 += alpha * mat_a[i + (k + 1) * lda] * b[1]; - sum0 += alpha * mat_a[i + (k + 2) * lda] * b[2]; - sum0 += alpha * mat_a[i + (k + 3) * lda] * b[3]; - - sum1 += alpha * mat_a[i + 1 + k * lda] * b[0]; - sum1 += alpha * mat_a[i + 1 + (k + 1) * lda] * b[1]; - sum1 += alpha * mat_a[i + 1 + (k + 2) * lda] * b[2]; - sum1 += alpha * mat_a[i + 1 + (k + 3) * lda] * b[3]; - - sum2 += alpha * mat_a[i + 2 + k * lda] * b[0]; - sum2 += alpha * mat_a[i + 2 + (k + 1) * lda] * b[1]; - sum2 += alpha * mat_a[i + 2 + (k + 2) * lda] * b[2]; - sum2 += alpha * mat_a[i + 2 + (k + 3) * lda] * b[3]; - - sum3 += alpha * mat_a[i + 3 + k * lda] * b[0]; - sum3 += alpha * mat_a[i + 3 + (k + 1) * lda] * b[1]; - sum3 += alpha * mat_a[i + 3 + (k + 2) * lda] * b[2]; - sum3 += alpha * mat_a[i + 3 + (k + 3) * lda] * b[3]; + "vswp d1, d4\n" + "vswp d3, d6\n" + + "vst1.32 {q0}, [r12]!\n" + "vst1.32 {q1}, [r12]!\n" + "vst1.32 {q2}, [r12]!\n" + "vst1.32 {q3}, [r12]!\n" + + : + : [ dst_c ] "r"(dst_c), [ src_c ] "r"(src_c), [ stride ] "r"(stride) + : "r10", "r12", "q0", "q1", "q2", "q3"); +#else + for (int tr = 0; tr < C4NUM; tr++) { + for (int tc = 0; tc < C4NUM; tc++) { + dst_c[tc * C4NUM + tr] = src_c[tr * lead + tc]; + } } - for (; k < K; ++k) { - float *b = &mat_b[j * ldb + k]; - sum0 += alpha * mat_a[i + (k * lda)] * b[0]; - sum1 += alpha * mat_a[i + 1 + (k * lda)] * b[0]; - sum2 += alpha * mat_a[i + 2 + (k * lda)] * b[0]; - sum3 += alpha * mat_a[i + 3 + (k * lda)] * b[0]; +#endif + } + for (; ci < col; ci++) { + const float *src_c = src_r + ci; + float *dst_c = dst_r + ci * C4NUM; + for (size_t i = 0; i < C4NUM; i++) { + dst_c[i] = src_c[i * lead]; } - mat_c[i * ldc + j] += sum0; - mat_c[(i + 1) * ldc + j] += sum1; - mat_c[(i + 2) * ldc + j] += sum2; - mat_c[(i + 3) * ldc + j] += sum3; } + src_r += C4NUM * col; + dst_r += C4NUM * col; } - // no more block of 4x4 - for (; i < M; ++i) { - for (j = 0; j < N; ++j) { - float sum = 0; - for (k = 0; k < K; ++k) { - sum += alpha * mat_a[i + k * lda] * mat_b[k + j * ldb]; - } - mat_c[i * ldc + j] += sum; + for (; ri < row; ri++) { + for (size_t i = 0; i < col; i++) { + dst_r[i * C4NUM] = src_r[i]; } + src_r += lead; + dst_r += 1; } + return; +} +#endif + +void GemmMatmul(int ta, int tb, int M, int N, int K, float alpha, const float *mat_a, int lda, const float *mat_b, + int ldb, float beta, float *mat_c, int ldc, float *workspace) { + GemmCb gcb; + gcb.atype = ActType_No; + gcb.ca = 0; + gcb.cb = 0; + gcb.bias = NULL; + GemmMatmulPlus(ta, tb, M, N, K, alpha, mat_a, lda, mat_b, ldb, beta, mat_c, ldc, workspace, &gcb); } -// mat_c = alpha*op( mat_a )*op( mat_b ) + beta*C -// M - number of rows of matrix a -// N - number of cols of matrix b -// K - number of cols of matrix a -// lda - fast dim of matrix a -// ldb - fast dim of matrix b -// ldc - fast dim of matrix c -void gemm(int transpose_a, int transpose_b, int M, int N, int K, float alpha, float *mat_a, int lda, float *mat_b, - int ldb, float beta, float *mat_c, int ldc) { - if (beta >= 0.f && beta <= 0.f) { - memset(mat_c, 0, M * N * sizeof(float)); - } else if (beta < 1.f || beta > 1.f) { - const int block_size = 4; - const int size = M * N; - int block_mod = size % block_size; - int block_c4 = size - block_mod; - int i; - for (i = 0; i < block_c4; i += block_size) { - float *c = &mat_c[i]; - c[0] *= beta; - c[1] *= beta; - c[2] *= beta; - c[3] *= beta; - } - for (; i < size; ++i) { - mat_c[i] *= beta; +void GemmMatmulPlus(int ta, int tb, int M, int N, int K, float alpha, const float *mat_a, int lda, const float *mat_b, + int ldb, float beta, float *mat_c, int ldc, float *workspace, GemmCb *gcb) { +#ifdef ENABLE_ARM32 + const int num = C4NUM; +#else + const int num = C12NUM; +#endif + float *output = mat_c; + float *fworkspace = workspace; + int incremental = (beta < 0.f) || (beta > 0.f); + float *mat_a_input = (float *)mat_a; + float *mat_b_input = (float *)mat_b; + +#ifdef ENABLE_ARM32 + if (!gcb->ca) { + mat_a_input = fworkspace; + fworkspace += MatSize(M, K, num); + if (ta) { + RowMajor2Row4MajorStride(mat_a, mat_a_input, K, M, lda); + } else { + RowMajor2Col4MajorStride(mat_a, mat_a_input, M, K, lda); } } - if (transpose_a && transpose_b) { - gemm_trana_tranb(M, N, K, alpha, mat_a, lda, mat_b, ldb, mat_c, ldc); - } else if (!transpose_a && !transpose_b) { - gemm_not_trana_not_tranb(M, N, K, alpha, mat_a, lda, mat_b, ldb, mat_c, ldc); - } else if (!transpose_a && transpose_b) { - gemm_not_trana_tranb(M, N, K, alpha, mat_a, lda, mat_b, ldb, mat_c, ldc); - } else { - gemm_trana_not_tranb(M, N, K, alpha, mat_a, lda, mat_b, ldb, mat_c, ldc); +#else + if (!gcb->ca) { + mat_a_input = fworkspace; + fworkspace += MatSize(M, K, num); + if (ta) { + RowMajor2Row12MajorStride(mat_a, mat_a_input, K, M, lda); + } else { + RowMajor2Col12MajorStride(mat_a, mat_a_input, M, K, lda); + } + } +#endif + if (!gcb->cb) { + mat_b_input = fworkspace; + fworkspace += MatSize(N, K, C8NUM); + if (tb) { + RowMajor2Col8MajorStride(mat_b, mat_b_input, N, K, ldb); + } else { + RowMajor2Row8MajorStride(mat_b, mat_b_input, K, N, ldb); + } } + if (incremental) output = fworkspace; + MatMulOpt(mat_a_input, mat_b_input, output, gcb->bias, gcb->atype, K, M, N, ldc, OutType_Nhwc); + if (incremental) addv(output, mat_c, beta, M, N, ldc); + gcb->mat_a = mat_a_input; + gcb->mat_b = mat_b_input; } diff --git a/mindspore/lite/nnacl/fp32_grad/gemm.h b/mindspore/lite/nnacl/fp32_grad/gemm.h index ab7007675c..b1da9b8288 100644 --- a/mindspore/lite/nnacl/fp32_grad/gemm.h +++ b/mindspore/lite/nnacl/fp32_grad/gemm.h @@ -17,11 +17,26 @@ #ifndef MINDSPORE_LITE_NNACL_FP32_GRAD_GEMM_H_ #define MINDSPORE_LITE_NNACL_FP32_GRAD_GEMM_H_ +#include +#include "nnacl/op_base.h" #ifdef __cplusplus extern "C" { #endif -void gemm(int transpose_a, int transpose_b, int M, int N, int K, float alpha, float *mat_a, int lda, float *mat_b, - int ldb, float beta, float *mat_c, int ldc); +typedef struct { + int ca; + int cb; + ActType atype; + float *bias; + float *mat_a; + float *mat_b; +} GemmCb; + +void GemmMatmulPlus(int ta, int tb, int M, int N, int K, float alpha, const float *mat_a, int lda, const float *mat_b, + int ldb, float beta, float *mat_c, int ldc, float *workspace, GemmCb *cb); +void GemmMatmul(int ta, int tb, int M, int N, int K, float alpha, const float *mat_a, int lda, const float *mat_b, + int ldb, float beta, float *mat_c, int ldc, float *workspace); +int MatSize(int row, int col, int round); +int MatSizeTotal(int row, int col, int deep, int inc); #ifdef __cplusplus } #endif diff --git a/mindspore/lite/nnacl/fp32_grad/pack_ext.c b/mindspore/lite/nnacl/fp32_grad/pack_ext.c index 645ad8dc2d..1406656ecb 100644 --- a/mindspore/lite/nnacl/fp32_grad/pack_ext.c +++ b/mindspore/lite/nnacl/fp32_grad/pack_ext.c @@ -16,10 +16,11 @@ #include #include "nnacl/fp32_grad/pack_ext.h" +#include "nnacl/pack.h" static int is_a_ge_zero_and_a_lt_b(int a, int b) { return (unsigned)(a) < (unsigned)(b); } -void im2col_hwc(const float *in_data, float *data_col, ConvParameter *conv_param) { +void rolling_im2col_hwc(const float *in_data, float *data_col, const ConvParameter *conv_param, int rows, int start) { const int pad_left = conv_param->pad_l_; const int pad_up = conv_param->pad_u_; @@ -35,42 +36,42 @@ void im2col_hwc(const float *in_data, float *data_col, ConvParameter *conv_param const int in_height = conv_param->input_h_; const int in_width = conv_param->input_w_; - const int output_h = conv_param->output_h_; const int output_w = conv_param->output_w_; const int channels = conv_param->input_channel_ / conv_param->group_; const int tot_channels = conv_param->input_channel_; - int kernel_row, kernel_col, output_rows, output_col; - - int row_stride_offset = 0; + int kernel_row, kernel_col; - for (output_rows = output_h; output_rows; output_rows--) { - int col_stride_offset = 0; - for (output_col = output_w; output_col; output_col--) { - for (kernel_row = 0; kernel_row < kernel_h; kernel_row++) { - int input_row = -pad_up + kernel_row * dilation_h + row_stride_offset; - for (kernel_col = 0; kernel_col < kernel_w; kernel_col++) { - int input_col = -pad_left + kernel_col * dilation_w + col_stride_offset; + for (int i = 0; i < rows; i++) { + int block_start = start + i; + int input_h = block_start / output_w * stride_h; + int input_w = block_start % output_w * stride_w; + for (kernel_row = 0; kernel_row < kernel_h; kernel_row++) { + int input_row = -pad_up + kernel_row * dilation_h + input_h; + for (kernel_col = 0; kernel_col < kernel_w; kernel_col++) { + int input_col = -pad_left + kernel_col * dilation_w + input_w; - if (is_a_ge_zero_and_a_lt_b(input_row, in_height) && is_a_ge_zero_and_a_lt_b(input_col, in_width)) { - const int offset = (input_row * in_width + input_col) * tot_channels; - memcpy(data_col, in_data + offset, sizeof(float) * channels); - data_col += channels; - } else { - memset(data_col, 0, sizeof(float) * channels); - data_col += channels; - } + if (is_a_ge_zero_and_a_lt_b(input_row, in_height) && is_a_ge_zero_and_a_lt_b(input_col, in_width)) { + const int offset = (input_row * in_width + input_col) * tot_channels; + memcpy(data_col, in_data + offset, sizeof(float) * channels); + data_col += channels; + } else { + memset(data_col, 0, sizeof(float) * channels); + data_col += channels; } } - col_stride_offset += stride_w; } - row_stride_offset += stride_h; } } +void RollingIm2ColPackUnitFp32(const float *input_data, const ConvParameter *conv_param, float *packed_input, + int real_cal_num, int block_index) { + rolling_im2col_hwc(input_data, packed_input, conv_param, real_cal_num, block_index); +} + // output matrix is (kernel_h*kernel_w*channels)X(output_h*output_w) -void im2row_hwc(const float *in_data, float *data_row, ConvParameter *conv_param, bool transpose) { +void im2row_hwc(const float *in_data, float *data_row, const ConvParameter *conv_param, bool transpose) { const int pad_left = conv_param->pad_l_; const int pad_up = conv_param->pad_u_; @@ -150,7 +151,56 @@ void im2row_hwc(const float *in_data, float *data_row, ConvParameter *conv_param } } -void col2im_hwc(const float *data_col, float *data_im, ConvParameter *conv_param) { +void rolling_im2row_hwc(const float *in_data, float *data_row, const ConvParameter *conv_param, int rows, int start) { + const int pad_left = conv_param->pad_l_; + const int pad_up = conv_param->pad_u_; + + const int stride_h = conv_param->stride_h_; + const int stride_w = conv_param->stride_w_; + + const int dilation_h = conv_param->dilation_h_; + const int dilation_w = conv_param->dilation_w_; + + const int kernel_h = conv_param->kernel_h_; + const int kernel_w = conv_param->kernel_w_; + + const int in_height = conv_param->output_h_; + const int in_width = conv_param->output_w_; + + const int output_w = conv_param->input_w_; + + const int tot_channels = conv_param->output_channel_; + const int channels = tot_channels / conv_param->group_; + int channel, kernel_row, kernel_col, output_rows, output_col; + for (channel = 0; channel < channels; channel++) { + for (kernel_row = 0; kernel_row < kernel_h; kernel_row++) { + for (kernel_col = 0; kernel_col < kernel_w; kernel_col++) { + for (output_rows = start; output_rows < start + rows; output_rows++) { + int input_row = -pad_up + kernel_row * dilation_h + output_rows * stride_h; + if (!is_a_ge_zero_and_a_lt_b(input_row, in_height)) { + for (output_col = output_w; output_col; output_col--) { + *(data_row++) = 0; + } + } else { + int input_col = -pad_left + kernel_col * dilation_w; + for (output_col = output_w; output_col; output_col--) { + if (is_a_ge_zero_and_a_lt_b(input_col, in_width)) { + const int offset = (input_row * in_width + input_col) * tot_channels + channel; + *(data_row++) = in_data[offset]; + } else { + *(data_row++) = 0; + } + input_col += stride_w; + } + } + // input_row += stride_h; + } + } + } + } +} + +void col2im_hwc(const float *data_col, float *data_im, const ConvParameter *conv_param) { const int pad_left = conv_param->pad_l_; const int pad_up = conv_param->pad_u_; @@ -198,3 +248,52 @@ void col2im_hwc(const float *data_col, float *data_im, ConvParameter *conv_param row_stride_offset += stride_h; } } + +void rolling_col2im_hwc(const float *data_col, float *data_im, const ConvParameter *conv_param, int rows, int start) { + const int pad_left = conv_param->pad_l_; + const int pad_up = conv_param->pad_u_; + + const int stride_h = conv_param->stride_h_; + const int stride_w = conv_param->stride_w_; + + const int dilation_h = conv_param->dilation_h_; + const int dilation_w = conv_param->dilation_w_; + + const int kernel_h = conv_param->kernel_h_; + const int kernel_w = conv_param->kernel_w_; + + const int in_height = conv_param->input_h_; + const int in_width = conv_param->input_w_; + + const int output_w = conv_param->output_w_; + const int channels = conv_param->input_channel_ / conv_param->group_; + const int tot_channels = conv_param->input_channel_; + + int kernel_row, kernel_col; + + for (int r = 0; r < rows; r++) { + int output_col = (start + r) % output_w; + int output_row = (start + r) / output_w; + int row_stride_offset = output_row * stride_h; + int col_stride_offset = output_col * stride_w; + + // for (output_col = 0; output_col < output_w; output_col++) + { + for (kernel_row = 0; kernel_row < kernel_h; kernel_row++) { + int input_row = -pad_up + kernel_row * dilation_h + row_stride_offset; + for (kernel_col = 0; kernel_col < kernel_w; kernel_col++) { + int input_col = -pad_left + kernel_col * dilation_w + col_stride_offset; + + if (is_a_ge_zero_and_a_lt_b(input_row, in_height) && is_a_ge_zero_and_a_lt_b(input_col, in_width)) { + int offset = (input_row * in_width + input_col) * tot_channels; + float *data_im_ptr = &data_im[offset]; + for (int i = 0; i < channels; i++) { + data_im_ptr[i] += data_col[i]; + } + } + data_col += channels; + } + } + } + } +} diff --git a/mindspore/lite/nnacl/fp32_grad/pack_ext.h b/mindspore/lite/nnacl/fp32_grad/pack_ext.h index aa5f33faa7..c2095a7503 100644 --- a/mindspore/lite/nnacl/fp32_grad/pack_ext.h +++ b/mindspore/lite/nnacl/fp32_grad/pack_ext.h @@ -17,14 +17,18 @@ #ifndef MINDSPORE_LITE_NNACL_FP32_GRAD_PACK_EXT_H_ #define MINDSPORE_LITE_NNACL_FP32_GRAD_PACK_EXT_H_ +#include #include "nnacl/conv_parameter.h" #ifdef __cplusplus extern "C" { #endif -void im2col_hwc(const float *in_data, float *data_col, ConvParameter *conv_param); -void im2row_hwc(const float *in_data, float *data_row, ConvParameter *conv_param, bool transpose); -void col2im_hwc(const float *data_col, float *data_im, ConvParameter *conv_param); + +void RollingIm2ColPackUnitFp32(const float *input_data, const ConvParameter *conv_param, float *packed_input, + int real_cal_num, int block_index); +void rolling_im2col_hwc(const float *in_data, float *data_col, const ConvParameter *conv_param, int rows, int start); +void rolling_im2row_hwc(const float *in_data, float *data_row, const ConvParameter *conv_param, int rows, int start); +void rolling_col2im_hwc(const float *data_col, float *data_im, const ConvParameter *conv_param, int rows, int start); #ifdef __cplusplus } #endif diff --git a/mindspore/lite/nnacl/fp32_grad/pooling_grad.c b/mindspore/lite/nnacl/fp32_grad/pooling_grad.c index ddf1b197cc..a5d702cf38 100644 --- a/mindspore/lite/nnacl/fp32_grad/pooling_grad.c +++ b/mindspore/lite/nnacl/fp32_grad/pooling_grad.c @@ -14,6 +14,7 @@ * limitations under the License. */ #include +#include #include #include "nnacl/fp32_grad/pooling_grad.h" @@ -31,8 +32,7 @@ void AvgPoolingGrad(const float *input_ptr, float *output_ptr, PoolingParameter int output_h = pooling_param->output_h_; int output_batch = pooling_param->output_batch_; - for (int i = 0; i < in_h * in_w * channel * output_batch; i++) output_ptr[i] = 0.0; - + memset(output_ptr, 0, in_h * in_w * channel * output_batch * sizeof(float)); float kk = (float)(win_h * win_w); for (uint16_t ib = 0; ib < output_batch; ib++) { float *out = &output_ptr[(ib * in_h * in_w * channel)]; @@ -77,8 +77,7 @@ void MaxPoolingGrad(const float *input_ptr, const float *dx_ptr, const float *dy int output_h = pooling_param->output_h_; int output_batch = pooling_param->output_batch_; - for (int i = 0; i < in_h * in_w * channel * output_batch; i++) output_ptr[i] = 0.0; - + memset(output_ptr, 0, in_h * in_w * channel * output_batch * sizeof(float)); for (uint16_t ib = 0; ib < output_batch; ib++) { float *out = &output_ptr[(ib * in_h * in_w * channel)]; const float *inPtr = (const float *)(&input_ptr[(ib * in_h * in_w * channel)]); diff --git a/mindspore/lite/nnacl/fp32_grad/reduce_grad.c b/mindspore/lite/nnacl/fp32_grad/reduce_grad.c index 6963969817..d682342a7a 100644 --- a/mindspore/lite/nnacl/fp32_grad/reduce_grad.c +++ b/mindspore/lite/nnacl/fp32_grad/reduce_grad.c @@ -15,50 +15,7 @@ */ #include #include "nnacl/fp32_grad/reduce_grad.h" - -static inline int NextIndex(const int num_dims, const int *dims, int *current) { - int carry = 1; - for (int idx = num_dims - 1; idx >= 0; --idx) { - int current_val = current[idx] + carry; - if (dims[idx] == current_val) { - current[idx] = 0; - } else { - current[idx] = current_val; - carry = 0; - break; - } - } - return (carry == 0); -} - -static inline size_t GetInputOffset(const int num_dims, const int *dims, const int *iter) { - size_t offset = 0; - for (int idx = 0; idx < num_dims; ++idx) { - offset = offset * (size_t)(dims[idx]) + (size_t)(iter[idx]); - } - - return offset; -} - -static inline size_t GetOutputOffset(const int num_dims, const int *dims, const int *iter, const int num_axis, - const int *axes) { - size_t offset = 0; - for (int idx = 0; idx < num_dims; ++idx) { - // if we need to skip this axis - int is_axis = 0; - for (int axis_idx = 0; axis_idx < num_axis; ++axis_idx) { - if (idx == axes[axis_idx]) { - is_axis = 1; - break; - } - } - - if (!is_axis) { - offset = offset * (size_t)(dims[idx]) + (size_t)(iter[idx]); - } - } - return offset; -} +#include "nnacl/fp32_grad/utils.h" void ReduceMeanByAxes(const float *input_data, int *input_iter, const int *input_dims, int input_num_dims, const int *axes, int num_axes, float *output_data, const int *output_dims, int output_num_dims) { @@ -111,7 +68,7 @@ void ReduceSumByAxes(const float *input, const int *input_dims, float *output, c return; } - for (int idx = 0; idx < num_outputs; ++idx) output[idx] = 0; // zero output + memset(output, 0, num_outputs * sizeof(float)); // zero output int input_iter[8] = {0}; int axes[5] = {0}; diff --git a/mindspore/lite/nnacl/fp32_grad/softmax_grad.c b/mindspore/lite/nnacl/fp32_grad/softmax_grad.c index 21bcc14188..c863e705dd 100644 --- a/mindspore/lite/nnacl/fp32_grad/softmax_grad.c +++ b/mindspore/lite/nnacl/fp32_grad/softmax_grad.c @@ -41,7 +41,6 @@ void SoftmaxGrad(const float *input_ptr, const float *yt_ptr, float *output_ptr, const int M = input_shape[axis]; const int N = inner_size; - const int K = 1; for (int i = 0; i < outter_size; i++) { int outter_offset = i * dim; memset(sum_data, 0.0f, inner_size * sizeof(float)); @@ -52,7 +51,14 @@ void SoftmaxGrad(const float *input_ptr, const float *yt_ptr, float *output_ptr, sum_data[k] += output_ptr[offset] * input_ptr[offset]; } } - gemm(0, 0, M, N, K, -1, sum_mul, K, sum_data, N, 1, &output_ptr[outter_offset], N); + for (int k = 0; k < M; ++k) { + float a = -sum_mul[k]; + for (int j = 0; j < N; ++j) { + *(output_ptr + outter_offset + k * N + j) += a * sum_data[j]; + } + } + + // gemm(0, 0, M, N, K, -1, sum_mul, K, sum_data, N, 1, &output_ptr[outter_offset], N); } for (int i = 0; i < ele_size; i++) { diff --git a/mindspore/lite/nnacl/fp32_grad/utils.h b/mindspore/lite/nnacl/fp32_grad/utils.h new file mode 100644 index 0000000000..f7895aa917 --- /dev/null +++ b/mindspore/lite/nnacl/fp32_grad/utils.h @@ -0,0 +1,72 @@ +/** + * Copyright 2020 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#ifndef MINDSPORE_LITE_NNACL_FP32_GRAD_UTILS_H_ +#define MINDSPORE_LITE_NNACL_FP32_GRAD_UTILS_H_ + +#include "nnacl/op_base.h" + +#ifdef __cplusplus +extern "C" { +#endif + +static inline size_t GetInputOffset(int num_dims, const int *dims, const int *iter) { + size_t offset = 0; + for (int idx = 0; idx < num_dims; ++idx) { + offset = offset * (size_t)(dims[idx]) + (size_t)(iter[idx]); + } + + return offset; +} + +static inline size_t GetOutputOffset(int num_dims, const int *dims, const int *iter, int num_axis, const int *axes) { + size_t offset = 0; + for (int idx = 0; idx < num_dims; ++idx) { + // if we need to skip this axis + int is_axis = 0; + for (int axis_idx = 0; axis_idx < num_axis; ++axis_idx) { + if (idx == axes[axis_idx]) { + is_axis = 1; + break; + } + } + + if (is_axis == 0) { + offset = offset * (size_t)(dims[idx]) + (size_t)(iter[idx]); + } + } + return offset; +} + +static inline int NextIndex(int num_dims, const int *dims, int *current) { + int carry = 1; + for (int idx = num_dims - 1; idx >= 0; --idx) { + int current_val = current[idx] + carry; + if (dims[idx] == current_val) { + current[idx] = 0; + } else { + current[idx] = current_val; + carry = 0; + break; + } + } + return (carry == 0); +} + +#ifdef __cplusplus +} +#endif + +#endif // MINDSPORE_LITE_NNACL_FP32_GRAD_UTILS_H_ diff --git a/mindspore/lite/schema/model.fbs b/mindspore/lite/schema/model.fbs index e2ccc1bc41..26c31f51ea 100644 --- a/mindspore/lite/schema/model.fbs +++ b/mindspore/lite/schema/model.fbs @@ -234,6 +234,9 @@ union PrimitiveType { BinaryCrossEntropyGrad, BinaryCrossEntropy, LpNormalization, + DropoutGrad, + MaximumGrad, + MinimumGrad } enum QuantType: int { diff --git a/mindspore/lite/schema/ops.fbs b/mindspore/lite/schema/ops.fbs index f3d7903333..5a768af145 100644 --- a/mindspore/lite/schema/ops.fbs +++ b/mindspore/lite/schema/ops.fbs @@ -224,6 +224,7 @@ table Conv2DGradFilter { dilateW: int; dilateH: int; hasBias: bool = false; + filter_shape: [int]; activationType: ActivationType = 0; } @@ -244,6 +245,7 @@ table Conv2DGradInput { dilateW: int; dilateH: int; hasBias: bool = false; + input_shape: [int]; activationType: ActivationType = 0; } @@ -264,6 +266,7 @@ table GroupConv2DGradInput { dilateW: int; dilateH: int; hasBias: bool = false; + input_shape: [int]; activationType: ActivationType = 0; } @@ -478,13 +481,10 @@ table DeConv2DGradFilter { } table BNGrad { - eps : float; - momentum: float; -} -table BNGradInput { - eps : float; + eps: float; momentum: float; } + table Scale { axis: int; activationType: ActivationType = 0; @@ -1087,6 +1087,16 @@ table FftReal { table FftImag { } +table DropoutGrad { + ratio : float = 0.5; +} + +table MaximumGrad { +} + +table MinimumGrad { +} + table NonMaxSuppression { centerPointBox : int = 0; } diff --git a/mindspore/lite/src/lite_kernel.h b/mindspore/lite/src/lite_kernel.h index 8defe1be0d..1e2877b514 100644 --- a/mindspore/lite/src/lite_kernel.h +++ b/mindspore/lite/src/lite_kernel.h @@ -95,13 +95,23 @@ class LiteKernel { std::string name() const { return this->name_; } - virtual void train() { train_mode_ = true; } + virtual int Train() { + this->train_mode_ = true; + return mindspore::lite::RET_OK; + } + + virtual bool IsTrain() const { return this->train_mode_; } + + virtual int Eval() { + this->train_mode_ = false; + return mindspore::lite::RET_OK; + } - virtual bool is_train() { return train_mode_; } + virtual bool IsEval() const { return !this->train_mode_; } - virtual void eval() { train_mode_ = false; } + virtual void SetTrainable(bool trainable = true) { this->trainable_ = trainable; } - virtual bool is_eval() { return !train_mode_; } + virtual bool IsTrainable() const { return this->trainable_; } void set_name(const std::string &name) { this->name_ = name; } @@ -179,6 +189,7 @@ class LiteKernel { std::vector in_kernels_; std::vector out_kernels_; bool train_mode_ = false; + bool trainable_ = false; // paramaters of this Kernel are trained in Train Session bool is_model_output_ = false; size_t workspace_size_ = 0; static void *workspace_; diff --git a/mindspore/lite/src/ops/adam.cc b/mindspore/lite/src/ops/adam.cc index 33b09ae829..45c28be1a1 100644 --- a/mindspore/lite/src/ops/adam.cc +++ b/mindspore/lite/src/ops/adam.cc @@ -73,7 +73,7 @@ Registry AdamRegistry(schema::PrimitiveType_Adam, AdamCreator); int Adam::InferShape(std::vector inputs, std::vector outputs) { if (10 != inputs.size()) { - MS_LOG(ERROR) << "Adam should have at 10 input tensors"; + MS_LOG(ERROR) << "Adam should have 10 input tensors"; return RET_ERROR; } diff --git a/mindspore/lite/src/ops/arithmetic_grad.cc b/mindspore/lite/src/ops/arithmetic_grad.cc index c4138444b2..58be418faa 100644 --- a/mindspore/lite/src/ops/arithmetic_grad.cc +++ b/mindspore/lite/src/ops/arithmetic_grad.cc @@ -42,11 +42,18 @@ int ArithmeticGrad::InferShape(std::vector inputs_, std::vector< MS_ASSERT(dx1 != nullptr); MS_ASSERT(dx2 != nullptr); + if ((Type() == schema::PrimitiveType_MaximumGrad) || (Type() == schema::PrimitiveType_MinimumGrad)) { + x1 = inputs_[0]; + x2 = inputs_[1]; + dy = inputs_[2]; + } + auto inShape0 = x1->shape(); auto inShape1 = x2->shape(); auto outShape = dy->shape(); - if ((Type() == schema::PrimitiveType_AddGrad) || (Type() == schema::PrimitiveType_SubGrad)) { + if ((Type() == schema::PrimitiveType_AddGrad) || (Type() == schema::PrimitiveType_SubGrad) || + (Type() == schema::PrimitiveType_MaximumGrad) || (Type() == schema::PrimitiveType_MinimumGrad)) { ndim_ = outShape.size(); x1_shape_.resize(ndim_); x2_shape_.resize(ndim_); @@ -61,7 +68,6 @@ int ArithmeticGrad::InferShape(std::vector inputs_, std::vector< dy_shape_[i] = outShape[i]; } } else { - // if (inShape0.size() < inShape1.size()) if (dx1->ElementsNum() < dx2->ElementsNum()) { ndim_ = inShape1.size(); x1_shape_.resize(ndim_); diff --git a/mindspore/lite/src/ops/bias_grad.cc b/mindspore/lite/src/ops/bias_grad.cc index dfe57ec218..27cc25bca0 100644 --- a/mindspore/lite/src/ops/bias_grad.cc +++ b/mindspore/lite/src/ops/bias_grad.cc @@ -45,7 +45,12 @@ int BiasGrad::UnPackAttr(const Primitive &prim, const std::vector &i MS_LOG(ERROR) << "new primitiveT value failed"; return RET_ERROR; } - attr->axis = {0}; // GetValue>(prim.GetAttr("axis")); + if (prim.GetAttr("axis") == nullptr) { + MS_LOG(WARNING) << "get axis failed"; + attr->axis = {0}; + } else { + attr->axis = GetValue>(prim.GetAttr("axis")); + } this->primitive_->value.value = attr; if (this->primitive_->value.value == nullptr) { MS_LOG(ERROR) << "primitive value is nullptr"; diff --git a/mindspore/lite/src/ops/bn_grad.cc b/mindspore/lite/src/ops/bn_grad.cc index 5770a3a9e9..c054911b51 100644 --- a/mindspore/lite/src/ops/bn_grad.cc +++ b/mindspore/lite/src/ops/bn_grad.cc @@ -42,13 +42,16 @@ int BNGrad::UnPackAttr(const Primitive &prim, const std::vector &inp return RET_ERROR; } if (this->primitive_->value.value == nullptr) { - auto attr = new (std::nothrow) schema::BNGradInputT(); + auto attr = new (std::nothrow) schema::BNGradT(); if (attr == nullptr) { MS_LOG(ERROR) << "new primitiveT value failed"; return RET_ERROR; } - attr->momentum = GetValue(prim.GetAttr("momentum")); - // FusedBatchNormGrad dows not get this attribute + attr->momentum = 0.1f; + if (prim.GetAttr("momentum") != nullptr) { + attr->momentum = GetValue(prim.GetAttr("momentum")); + } + attr->eps = 1e-5; if (prim.GetAttr("epsilon") != nullptr) { attr->eps = GetValue(prim.GetAttr("epsilon")); } @@ -75,6 +78,9 @@ int BNGrad::UnPackToFlatBuilder(const schema::Primitive *primitive, flatbuffers: return RET_OK; } +PrimitiveC *BNGradCreator(const schema::Primitive *primitive) { return PrimitiveC::NewPrimitiveC(primitive); } +Registry BNGradRegistry(schema::PrimitiveType_BNGrad, BNGradCreator); + float BNGrad::GetEps() const { return this->primitive_->value_as_BNGrad()->eps(); } float BNGrad::GetMomentum() const { return this->primitive_->value_as_BNGrad()->momentum(); } #endif @@ -90,6 +96,10 @@ int BNGrad::InferShape(std::vector inputs, std::vectorshape().size() != 4) { + MS_LOG(ERROR) << "Grad Fused batchnorm only support nhwc input!"; + } + outputs[0]->set_shape(in->shape()); outputs[1]->set_shape(scale->shape()); outputs[2]->set_shape(scale->shape()); diff --git a/mindspore/lite/src/ops/conv2d_grad_filter.cc b/mindspore/lite/src/ops/conv2d_grad_filter.cc index 33a161bcb3..2199c9f0b4 100644 --- a/mindspore/lite/src/ops/conv2d_grad_filter.cc +++ b/mindspore/lite/src/ops/conv2d_grad_filter.cc @@ -38,6 +38,7 @@ int Conv2DGradFilter::GetPadRight() const { return this->primitive_->value.AsCon int Conv2DGradFilter::GetDilateW() const { return this->primitive_->value.AsConv2DGradFilter()->dilateW; } int Conv2DGradFilter::GetDilateH() const { return this->primitive_->value.AsConv2DGradFilter()->dilateH; } bool Conv2DGradFilter::GetHasBias() const { return this->primitive_->value.AsConv2DGradFilter()->hasBias; } + int Conv2DGradFilter::GetActivationType() const { return this->primitive_->value.AsConv2DGradFilter()->activationType; } void Conv2DGradFilter::SetFormat(int format) { @@ -66,6 +67,9 @@ void Conv2DGradFilter::SetPadRight(int pad_right) { void Conv2DGradFilter::SetDilateW(int dilate_w) { this->primitive_->value.AsConv2DGradFilter()->dilateW = dilate_w; } void Conv2DGradFilter::SetDilateH(int dilate_h) { this->primitive_->value.AsConv2DGradFilter()->dilateH = dilate_h; } void Conv2DGradFilter::SetHasBias(bool has_bias) { this->primitive_->value.AsConv2DGradFilter()->hasBias = has_bias; } +std::vector Conv2DGradFilter::GetFilterShape() const { + return this->primitive_->value.AsConv2DGradFilter()->filter_shape; +} void Conv2DGradFilter::SetActivationType(int activation_type) { this->primitive_->value.AsConv2DGradFilter()->activationType = (schema::ActivationType)activation_type; } @@ -134,6 +138,28 @@ int Conv2DGradFilter::UnPackAttr(const Primitive &prim, const std::vectoractivationType = schema::ActivationType_NO_ACTIVATION; } + if (inputs.size() >= kAnfPopulaterThree) { + auto filter_shape = inputs[kAnfPopulaterTwo]; + MS_ASSERT(filter_shape != nullptr); + if (filter_shape->isa()) { + auto valueNode = filter_shape->cast(); + MS_ASSERT(valueNode != nullptr); + auto value = valueNode->value(); + MS_ASSERT(value != nullptr); + if (value->isa()) { + auto valTuplPtr = dyn_cast(value); + MS_ASSERT(valTuplPtr != nullptr); + const int nchw2nhwc[] = {0, 3, 1, 2}; + attr->filter_shape.resize(valTuplPtr->size()); + for (size_t i = 0; i < valTuplPtr->size(); i++) { + auto elem = dyn_cast((*valTuplPtr)[i]); + MS_ASSERT(elem != nullptr); + attr->filter_shape[nchw2nhwc[i]] = elem->value(); + } + } + } + } + this->primitive_->value.value = attr; if (this->primitive_->value.value == nullptr) { MS_LOG(ERROR) << "primitive value is nullptr"; @@ -151,10 +177,16 @@ int Conv2DGradFilter::UnPackToFlatBuilder(const schema::Primitive *primitive, fl MS_LOG(ERROR) << "value_as_Conv2DGradFilter return nullptr"; return RET_ERROR; } - auto val_offset = schema::CreateConv2DGradFilter( + std::vector filter_shape; + if (attr->filter_shape() != nullptr) { + for (int i = 0; i < static_cast(attr->filter_shape()->size()); i++) { + filter_shape.push_back(attr->filter_shape()->data()[i]); + } + } + auto val_offset = schema::CreateConv2DGradFilterDirect( *fbb, attr->format(), attr->group(), attr->channelIn(), attr->channelOut(), attr->kernelW(), attr->kernelH(), attr->strideW(), attr->strideH(), attr->padMode(), attr->padUp(), attr->padDown(), attr->padLeft(), - attr->padRight(), attr->dilateW(), attr->dilateH(), attr->hasBias(), attr->activationType()); + attr->padRight(), attr->dilateW(), attr->dilateH(), attr->hasBias(), &filter_shape, attr->activationType()); auto prim_offset = schema::CreatePrimitive(*fbb, schema::PrimitiveType_Conv2DGradFilter, val_offset.o); fbb->Finish(prim_offset); return RET_OK; @@ -175,6 +207,10 @@ int Conv2DGradFilter::GetPadRight() const { return this->primitive_->value_as_Co int Conv2DGradFilter::GetDilateW() const { return this->primitive_->value_as_Conv2DGradFilter()->dilateW(); } int Conv2DGradFilter::GetDilateH() const { return this->primitive_->value_as_Conv2DGradFilter()->dilateH(); } bool Conv2DGradFilter::GetHasBias() const { return this->primitive_->value_as_Conv2DGradFilter()->hasBias(); } +std::vector Conv2DGradFilter::GetFilterShape() const { + auto fb_vector = this->primitive_->value_as_Conv2DGradFilter()->filter_shape(); + return std::vector(fb_vector->begin(), fb_vector->end()); +} int Conv2DGradFilter::GetActivationType() const { return this->primitive_->value_as_Conv2DGradFilter()->activationType(); } @@ -186,41 +222,22 @@ Registry conv2DGradFilterRegistry(schema::PrimitiveType_Conv2DGradFilter, Conv2D #endif int Conv2DGradFilter::InferShape(std::vector inputs, std::vector outputs) { - if (3 != inputs.size()) { - MS_LOG(ERROR) << "Conv2d Grad Filter should have 3 inputs"; + if (2 != inputs.size()) { + MS_LOG(ERROR) << "Conv2d Grad Filter should have 2 inputs, but it got " << inputs.size(); return RET_ERROR; } if (1 != outputs.size()) { - MS_LOG(ERROR) << "Conv2d Grad Filter should have one output"; + MS_LOG(ERROR) << "Conv2d Grad Filter should have one output but it got " << outputs.size(); return RET_ERROR; } auto *in0 = inputs.at(0); - auto *in = inputs.at(2); MS_ASSERT(in0 != nullptr); - MS_ASSERT(in != nullptr); - - std::vector output_shape; - int *out_shape = reinterpret_cast(in->MutableData()); - int new_size = in->ElementsNum(); - if (in0->GetFormat() == in->GetFormat()) { - for (int i = 0; i < new_size; i++) output_shape.push_back(out_shape[i]); - } else { - if ((in0->GetFormat() == schema::Format_NHWC) && (in->GetFormat() == schema::Format_NCHW)) { - output_shape.push_back(out_shape[0]); - output_shape.push_back(out_shape[2]); - output_shape.push_back(out_shape[3]); - output_shape.push_back(out_shape[1]); - } else { - MS_LOG(ERROR) << "Shape covnert is not supported"; - return RET_ERROR; - } - } auto *out = outputs.at(0); MS_ASSERT(out != nullptr); - out->set_shape(output_shape); + out->set_shape(GetFilterShape()); out->set_data_type(in0->data_type()); out->SetFormat(in0->GetFormat()); diff --git a/mindspore/lite/src/ops/conv2d_grad_filter.h b/mindspore/lite/src/ops/conv2d_grad_filter.h index 93cd81c245..ec56829e51 100644 --- a/mindspore/lite/src/ops/conv2d_grad_filter.h +++ b/mindspore/lite/src/ops/conv2d_grad_filter.h @@ -72,6 +72,7 @@ class Conv2DGradFilter : public PrimitiveC { int GetDilateH() const; bool GetHasBias() const; int GetActivationType() const; + std::vector GetFilterShape() const; }; } // namespace lite } // namespace mindspore diff --git a/mindspore/lite/src/ops/conv2d_grad_input.cc b/mindspore/lite/src/ops/conv2d_grad_input.cc index 562ae6a326..37889e5bb7 100644 --- a/mindspore/lite/src/ops/conv2d_grad_input.cc +++ b/mindspore/lite/src/ops/conv2d_grad_input.cc @@ -39,6 +39,9 @@ int Conv2DGradInput::GetPadRight() const { return this->primitive_->value.AsConv int Conv2DGradInput::GetDilateW() const { return this->primitive_->value.AsConv2DGradInput()->dilateW; } int Conv2DGradInput::GetDilateH() const { return this->primitive_->value.AsConv2DGradInput()->dilateH; } bool Conv2DGradInput::GetHasBias() const { return this->primitive_->value.AsConv2DGradInput()->hasBias; } +std::vector Conv2DGradInput::GetInputShape() const { + return this->primitive_->value.AsConv2DGradInput()->input_shape; +} int Conv2DGradInput::GetActivationType() const { return this->primitive_->value.AsConv2DGradInput()->activationType; } void Conv2DGradInput::SetFormat(int format) { @@ -137,6 +140,27 @@ int Conv2DGradInput::UnPackAttr(const Primitive &prim, const std::vectoractivationType = schema::ActivationType_NO_ACTIVATION; } + if (inputs.size() >= kAnfPopulaterThree) { + auto input_shape = inputs[kAnfPopulaterTwo]; + MS_ASSERT(input_shape != nullptr); + if (input_shape->isa()) { + auto valueNode = input_shape->cast(); + MS_ASSERT(valueNode != nullptr); + auto value = valueNode->value(); + MS_ASSERT(value != nullptr); + if (value->isa()) { + auto valTuplPtr = dyn_cast(value); + MS_ASSERT(valTuplPtr != nullptr); + const int nchw2nhwc[] = {0, 3, 1, 2}; + attr->input_shape.resize(valTuplPtr->size()); + for (size_t i = 0; i < valTuplPtr->size(); i++) { + auto elem = dyn_cast((*valTuplPtr)[i]); + MS_ASSERT(elem != nullptr); + attr->input_shape[nchw2nhwc[i]] = elem->value(); + } + } + } + } this->primitive_->value.value = attr; if (this->primitive_->value.value == nullptr) { MS_LOG(ERROR) << "primitive value is nullptr"; @@ -154,10 +178,16 @@ int Conv2DGradInput::UnPackToFlatBuilder(const schema::Primitive *primitive, fla MS_LOG(ERROR) << "value_as_Conv2DGradInput return nullptr"; return RET_ERROR; } - auto val_offset = schema::CreateConv2DGradInput( + std::vector input_shape; + if (attr->input_shape() != nullptr) { + for (int i = 0; i < static_cast(attr->input_shape()->size()); i++) { + input_shape.push_back(attr->input_shape()->data()[i]); + } + } + auto val_offset = schema::CreateConv2DGradInputDirect( *fbb, attr->format(), attr->group(), attr->channelIn(), attr->channelOut(), attr->kernelW(), attr->kernelH(), attr->strideW(), attr->strideH(), attr->padMode(), attr->padUp(), attr->padDown(), attr->padLeft(), - attr->padRight(), attr->dilateW(), attr->dilateH(), attr->hasBias(), attr->activationType()); + attr->padRight(), attr->dilateW(), attr->dilateH(), attr->hasBias(), &input_shape, attr->activationType()); auto prim_offset = schema::CreatePrimitive(*fbb, schema::PrimitiveType_Conv2DGradInput, val_offset.o); fbb->Finish(prim_offset); return RET_OK; @@ -178,6 +208,10 @@ int Conv2DGradInput::GetPadRight() const { return this->primitive_->value_as_Con int Conv2DGradInput::GetDilateW() const { return this->primitive_->value_as_Conv2DGradInput()->dilateW(); } int Conv2DGradInput::GetDilateH() const { return this->primitive_->value_as_Conv2DGradInput()->dilateH(); } bool Conv2DGradInput::GetHasBias() const { return this->primitive_->value_as_Conv2DGradInput()->hasBias(); } +std::vector Conv2DGradInput::GetInputShape() const { + auto fb_vector = this->primitive_->value_as_Conv2DGradInput()->input_shape(); + return std::vector(fb_vector->begin(), fb_vector->end()); +} int Conv2DGradInput::GetActivationType() const { return this->primitive_->value_as_Conv2DGradInput()->activationType(); } @@ -189,40 +223,21 @@ Registry Conv2DGradInputRegistry(schema::PrimitiveType_Conv2DGradInput, Conv2DGr #endif int Conv2DGradInput::InferShape(std::vector inputs, std::vector outputs) { - if (3 != inputs.size()) { - MS_LOG(ERROR) << "Conv2d Grad Input should have 3 inputs"; + if (2 != inputs.size()) { + MS_LOG(ERROR) << "Conv2d Grad Input should have 2 inputs"; return RET_ERROR; } if (1 != outputs.size()) { - MS_LOG(ERROR) << "Conv2d Grad input should have one output"; + MS_LOG(ERROR) << "Conv2d Grad output should have one output"; return RET_ERROR; } auto *in0 = inputs.at(0); - auto *in = inputs.at(2); MS_ASSERT(in0 != nullptr); - MS_ASSERT(in != nullptr); - - std::vector output_shape; - int *out_shape = reinterpret_cast(in->MutableData()); - int new_size = in->ElementsNum(); - if (in0->GetFormat() == in->GetFormat()) { - for (int i = 0; i < new_size; i++) output_shape.push_back(out_shape[i]); - } else { - if ((in0->GetFormat() == schema::Format_NHWC) && (in->GetFormat() == schema::Format_NCHW)) { - output_shape.push_back(out_shape[0]); - output_shape.push_back(out_shape[2]); - output_shape.push_back(out_shape[3]); - output_shape.push_back(out_shape[1]); - } else { - MS_LOG(ERROR) << "Shape covnert is not supported"; - return RET_ERROR; - } - } auto *out = outputs.at(0); MS_ASSERT(out != nullptr); - out->set_shape(output_shape); + out->set_shape(GetInputShape()); out->set_data_type(in0->data_type()); out->SetFormat(in0->GetFormat()); diff --git a/mindspore/lite/src/ops/conv2d_grad_input.h b/mindspore/lite/src/ops/conv2d_grad_input.h index 73e8f39402..53816be7fd 100644 --- a/mindspore/lite/src/ops/conv2d_grad_input.h +++ b/mindspore/lite/src/ops/conv2d_grad_input.h @@ -72,6 +72,7 @@ class Conv2DGradInput : public PrimitiveC { int GetDilateH() const; bool GetHasBias() const; int GetActivationType() const; + std::vector GetInputShape() const; }; } // namespace lite } // namespace mindspore diff --git a/mindspore/lite/src/ops/dropout.cc b/mindspore/lite/src/ops/dropout.cc index 4470f55e9b..dd74b24506 100644 --- a/mindspore/lite/src/ops/dropout.cc +++ b/mindspore/lite/src/ops/dropout.cc @@ -27,6 +27,37 @@ float Dropout::GetRatio() const { return this->primitive_->value.AsDropout()->ra void Dropout::SetRatio(float ratio) { this->primitive_->value.AsDropout()->ratio = ratio; } +int Dropout::UnPackAttr(const Primitive &prim, const std::vector &inputs) { + if (this->primitive_ == nullptr) { + this->primitive_ = new (std::nothrow) schema::PrimitiveT; + if (this->primitive_ == nullptr) { + MS_LOG(ERROR) << "new primitiveT failed"; + return RET_ERROR; + } + this->primitive_->value.type = schema::PrimitiveType_Dropout; + } + if (this->primitive_->value.type != schema::PrimitiveType_Dropout) { + MS_LOG(ERROR) << "Primitive type is error :" << this->primitive_->value.type; + return RET_ERROR; + } + if (this->primitive_->value.value == nullptr) { + auto attr = new (std::nothrow) schema::DropoutT(); + if (attr == nullptr) { + MS_LOG(ERROR) << "new primitiveT value failed"; + return RET_ERROR; + } + if (prim.GetAttr("keep_prob") != nullptr) { + attr->ratio = GetValue(prim.GetAttr("keep_prob")); + } + this->primitive_->value.value = attr; + if (this->primitive_->value.value == nullptr) { + MS_LOG(ERROR) << "primitive value is nullptr"; + return RET_ERROR; + } + } + return RET_OK; +} + #else int Dropout::UnPackToFlatBuilder(const schema::Primitive *primitive, flatbuffers::FlatBufferBuilder *fbb) { MS_ASSERT(nullptr != primitive); @@ -46,5 +77,29 @@ float Dropout::GetRatio() const { return this->primitive_->value_as_Dropout()->r PrimitiveC *DropoutCreator(const schema::Primitive *primitive) { return PrimitiveC::NewPrimitiveC(primitive); } Registry DropoutRegistry(schema::PrimitiveType_Dropout, DropoutCreator); #endif +int Dropout::InferShape(std::vector inputs_, std::vector outputs_) { + MS_ASSERT(this->primitive_ != nullptr); + auto input = inputs_.front(); + MS_ASSERT(input != nullptr); + auto output0 = outputs_.front(); + MS_ASSERT(output0 != nullptr); + if (!GetInferFlag()) { + return RET_OK; + } + output0->set_shape(input->shape()); + output0->set_data_type(input->data_type()); + output0->SetFormat(input->GetFormat()); + + if (outputs_.size() > 1) { + auto output1 = outputs_[1]; + MS_ASSERT(output1 != nullptr); + output1->set_shape(input->shape()); + output1->set_data_type(input->data_type()); + output1->SetFormat(input->GetFormat()); + } + + return RET_OK; +} + } // namespace lite } // namespace mindspore diff --git a/mindspore/lite/src/ops/dropout.h b/mindspore/lite/src/ops/dropout.h index 5bb645f765..21310974b6 100644 --- a/mindspore/lite/src/ops/dropout.h +++ b/mindspore/lite/src/ops/dropout.h @@ -14,8 +14,8 @@ * limitations under the License. */ -#ifndef LITE_MINDSPORE_LITE_C_OPS_DROPOUT_H_ -#define LITE_MINDSPORE_LITE_C_OPS_DROPOUT_H_ +#ifndef MINDSPORE_LITE_SRC_OPS_DROPOUT_H_ +#define MINDSPORE_LITE_SRC_OPS_DROPOUT_H_ #include #include @@ -32,13 +32,16 @@ class Dropout : public PrimitiveC { MS_DECLARE_PARENT(Dropout, PrimitiveC); explicit Dropout(schema::PrimitiveT *primitive) : PrimitiveC(primitive) {} void SetRatio(float ratio); + int UnPackAttr(const Primitive &prim, const std::vector &inputs) override; #else int UnPackToFlatBuilder(const schema::Primitive *primitive, flatbuffers::FlatBufferBuilder *fbb) override; #endif float GetRatio() const; + int InferShape(std::vector inputs_, std::vector outputs_) override; }; + } // namespace lite } // namespace mindspore -#endif // LITE_MINDSPORE_LITE_C_OPS_DROPOUT_H_ +#endif // MINDSPORE_LITE_SRC_OPS_DROPOUT_H_ diff --git a/mindspore/lite/src/ops/dropout_grad.cc b/mindspore/lite/src/ops/dropout_grad.cc new file mode 100644 index 0000000000..99b348f005 --- /dev/null +++ b/mindspore/lite/src/ops/dropout_grad.cc @@ -0,0 +1,100 @@ +/** + * Copyright 2019-2020 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "src/ops/dropout_grad.h" + +#ifndef PRIMITIVE_WRITEABLE +#include "src/ops/ops_register.h" +#endif + +namespace mindspore { +namespace lite { +#ifdef PRIMITIVE_WRITEABLE +float DropoutGrad::GetRatio() const { return this->primitive_->value.AsDropout()->ratio; } + +void DropoutGrad::SetRatio(float ratio) { this->primitive_->value.AsDropout()->ratio = ratio; } + +int DropoutGrad::UnPackAttr(const Primitive &prim, const std::vector &inputs) { + if (this->primitive_ == nullptr) { + this->primitive_ = new (std::nothrow) schema::PrimitiveT; + if (this->primitive_ == nullptr) { + MS_LOG(ERROR) << "new primitiveT failed"; + return RET_ERROR; + } + this->primitive_->value.type = schema::PrimitiveType_DropoutGrad; + } + if (this->primitive_->value.type != schema::PrimitiveType_DropoutGrad) { + MS_LOG(ERROR) << "Primitive type is error :" << this->primitive_->value.type; + return RET_ERROR; + } + if (this->primitive_->value.value == nullptr) { + auto attr = new (std::nothrow) schema::DropoutGradT(); + if (attr == nullptr) { + MS_LOG(ERROR) << "new primitiveT value failed"; + return RET_ERROR; + } + if (prim.GetAttr("keep_prob") != nullptr) { + attr->ratio = GetValue(prim.GetAttr("keep_prob")); + } + this->primitive_->value.value = attr; + if (this->primitive_->value.value == nullptr) { + MS_LOG(ERROR) << "primitive value is nullptr"; + return RET_ERROR; + } + } + return RET_OK; +} +#else +int DropoutGrad::UnPackToFlatBuilder(const schema::Primitive *primitive, flatbuffers::FlatBufferBuilder *fbb) { + MS_ASSERT(nullptr != primitive); + MS_ASSERT(nullptr != fbb); + auto attr = primitive->value_as_DropoutGrad(); + if (attr == nullptr) { + MS_LOG(ERROR) << "value_as_DropoutGrad return nullptr"; + return RET_ERROR; + } + auto val_offset = schema::CreateDropoutGrad(*fbb, attr->ratio()); + auto prim_offset = schema::CreatePrimitive(*fbb, schema::PrimitiveType_DropoutGrad, val_offset.o); + fbb->Finish(prim_offset); + return RET_OK; +} +float DropoutGrad::GetRatio() const { return this->primitive_->value_as_DropoutGrad()->ratio(); } + +PrimitiveC *DropoutGradCreator(const schema::Primitive *primitive) { + return PrimitiveC::NewPrimitiveC(primitive); +} +Registry DropoutGradRegistry(schema::PrimitiveType_DropoutGrad, DropoutGradCreator); + +#endif +int DropoutGrad::InferShape(std::vector inputs_, std::vector outputs_) { + MS_ASSERT(this->primitive_ != nullptr); + MS_ASSERT(inputs_.size() == 2); + auto input = inputs_.front(); + MS_ASSERT(input != nullptr); + auto output = outputs_.front(); + MS_ASSERT(output != nullptr); + if (!GetInferFlag()) { + return RET_OK; + } + output->set_shape(input->shape()); + output->set_data_type(input->data_type()); + output->SetFormat(input->GetFormat()); + + return RET_OK; +} + +} // namespace lite +} // namespace mindspore diff --git a/mindspore/lite/src/ops/dropout_grad.h b/mindspore/lite/src/ops/dropout_grad.h new file mode 100644 index 0000000000..c0d0d11c29 --- /dev/null +++ b/mindspore/lite/src/ops/dropout_grad.h @@ -0,0 +1,47 @@ +/** + * Copyright 2019-2020 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef MINDSPORE_LITE_SRC_OPS_DROPOUT_GRAD_H_ +#define MINDSPORE_LITE_SRC_OPS_DROPOUT_GRAD_H_ + +#include +#include +#include +#include "src/ops/primitive_c.h" + +namespace mindspore { +namespace lite { +class DropoutGrad : public PrimitiveC { + public: +#ifdef PRIMITIVE_WRITEABLE + MS_DECLARE_PARENT(DropoutGrad, PrimitiveC); + DropoutGrad() = default; + explicit DropoutGrad(schema::PrimitiveT *primitive) : PrimitiveC(primitive) {} + void SetRatio(float ratio); + int UnPackAttr(const Primitive &prim, const std::vector &inputs) override; + +#else + DropoutGrad() = default; + + int UnPackToFlatBuilder(const schema::Primitive *primitive, flatbuffers::FlatBufferBuilder *fbb) override; +#endif + float GetRatio() const; + int InferShape(std::vector inputs_, std::vector outputs_) override; +}; +} // namespace lite +} // namespace mindspore + +#endif // MINDSPORE_LITE_SRC_OPS_DROPOUT_GRAD_H_ diff --git a/mindspore/lite/src/ops/group_conv2d_grad_input.cc b/mindspore/lite/src/ops/group_conv2d_grad_input.cc index 1ce21b2506..673d89a8cd 100644 --- a/mindspore/lite/src/ops/group_conv2d_grad_input.cc +++ b/mindspore/lite/src/ops/group_conv2d_grad_input.cc @@ -39,6 +39,9 @@ int GroupConv2DGradInput::GetPadRight() const { return this->primitive_->value.A int GroupConv2DGradInput::GetDilateW() const { return this->primitive_->value.AsGroupConv2DGradInput()->dilateW; } int GroupConv2DGradInput::GetDilateH() const { return this->primitive_->value.AsGroupConv2DGradInput()->dilateH; } bool GroupConv2DGradInput::GetHasBias() const { return this->primitive_->value.AsGroupConv2DGradInput()->hasBias; } +std::vector GroupConv2DGradInput::GetInputShape() const { + return this->primitive_->value.AsGroupConv2DGradInput()->input_shape; +} int GroupConv2DGradInput::GetActivationType() const { return this->primitive_->value.AsGroupConv2DGradInput()->activationType; } @@ -99,10 +102,16 @@ int GroupConv2DGradInput::UnPackToFlatBuilder(const schema::Primitive *primitive MS_LOG(ERROR) << "value_as_GroupConv2DGradInput return nullptr"; return RET_ERROR; } - auto val_offset = schema::CreateGroupConv2DGradInput( + std::vector input_shape; + if (attr->input_shape() != nullptr) { + for (int i = 0; i < static_cast(attr->input_shape()->size()); i++) { + input_shape.push_back(attr->input_shape()->data()[i]); + } + } + auto val_offset = schema::CreateGroupConv2DGradInputDirect( *fbb, attr->format(), attr->group(), attr->channelIn(), attr->channelOut(), attr->kernelW(), attr->kernelH(), attr->strideW(), attr->strideH(), attr->padMode(), attr->padUp(), attr->padDown(), attr->padLeft(), - attr->padRight(), attr->dilateW(), attr->dilateH(), attr->hasBias(), attr->activationType()); + attr->padRight(), attr->dilateW(), attr->dilateH(), attr->hasBias(), &input_shape, attr->activationType()); auto prim_offset = schema::CreatePrimitive(*fbb, schema::PrimitiveType_GroupConv2DGradInput, val_offset.o); fbb->Finish(prim_offset); return RET_OK; @@ -127,51 +136,38 @@ int GroupConv2DGradInput::GetPadRight() const { return this->primitive_->value_a int GroupConv2DGradInput::GetDilateW() const { return this->primitive_->value_as_GroupConv2DGradInput()->dilateW(); } int GroupConv2DGradInput::GetDilateH() const { return this->primitive_->value_as_GroupConv2DGradInput()->dilateH(); } bool GroupConv2DGradInput::GetHasBias() const { return this->primitive_->value_as_GroupConv2DGradInput()->hasBias(); } +std::vector GroupConv2DGradInput::GetInputShape() const { + auto fb_vector = this->primitive_->value_as_GroupConv2DGradInput()->input_shape(); + return std::vector(fb_vector->begin(), fb_vector->end()); +} int GroupConv2DGradInput::GetActivationType() const { return this->primitive_->value_as_GroupConv2DGradInput()->activationType(); } - PrimitiveC *GroupConv2DGradInputCreator(const schema::Primitive *primitive) { return PrimitiveC::NewPrimitiveC(primitive); } Registry GroupConv2DGradInputRegistry(schema::PrimitiveType_GroupConv2DGradInput, GroupConv2DGradInputCreator); + #endif int GroupConv2DGradInput::InferShape(std::vector inputs, std::vector outputs) { - if (3 != inputs.size()) { - MS_LOG(ERROR) << "Conv2d Grad Input should have 3 inputs"; + if (2 != inputs.size()) { + MS_LOG(ERROR) << "Conv2d Grad input should have 2 inputs"; return RET_ERROR; } if (1 != outputs.size()) { - MS_LOG(ERROR) << "Conv2d Grad input should have one output"; + MS_LOG(ERROR) << "Conv2d Grad output should have one output"; return RET_ERROR; } auto *in0 = inputs.at(0); - auto *in = inputs.at(2); - MS_ASSERT(in0 != nullptr); - MS_ASSERT(in != nullptr); - std::vector output_shape; - int *out_shape = reinterpret_cast(in->MutableData()); - int new_size = in->ElementsNum(); - if (in0->GetFormat() == in->GetFormat()) { - for (int i = 0; i < new_size; i++) output_shape.push_back(out_shape[i]); - } else { - if ((in0->GetFormat() == schema::Format_NHWC) && (in->GetFormat() == schema::Format_NCHW)) { - output_shape.push_back(out_shape[0]); - output_shape.push_back(out_shape[2]); - output_shape.push_back(out_shape[3]); - output_shape.push_back(out_shape[1]); - } else { - MS_LOG(ERROR) << "Shape covnert is not supported"; - return RET_ERROR; - } - } + MS_ASSERT(in0 != nullptr); auto *out = outputs.at(0); MS_ASSERT(out != nullptr); - out->set_shape(output_shape); + out->set_shape(GetInputShape()); + out->set_data_type(in0->data_type()); out->SetFormat(in0->GetFormat()); diff --git a/mindspore/lite/src/ops/group_conv2d_grad_input.h b/mindspore/lite/src/ops/group_conv2d_grad_input.h index a005ced74d..a53af0f316 100644 --- a/mindspore/lite/src/ops/group_conv2d_grad_input.h +++ b/mindspore/lite/src/ops/group_conv2d_grad_input.h @@ -70,6 +70,7 @@ class GroupConv2DGradInput : public PrimitiveC { int GetDilateW() const; int GetDilateH() const; bool GetHasBias() const; + std::vector GetInputShape() const; int GetActivationType() const; }; } // namespace lite diff --git a/mindspore/lite/src/ops/maximum.h b/mindspore/lite/src/ops/maximum.h index 9b85e202c3..052088ebab 100644 --- a/mindspore/lite/src/ops/maximum.h +++ b/mindspore/lite/src/ops/maximum.h @@ -14,8 +14,8 @@ * limitations under the License. */ -#ifndef LITE_MINDSPORE_LITE_C_OPS_MAXIMUM_H_ -#define LITE_MINDSPORE_LITE_C_OPS_MAXIMUM_H_ +#ifndef MINDSPORE_LITE_SRC_OPS_MAXIMUM_H_ +#define MINDSPORE_LITE_SRC_OPS_MAXIMUM_H_ #include #include @@ -41,4 +41,4 @@ class Maximum : public Arithmetic { } // namespace lite } // namespace mindspore -#endif // LITE_MINDSPORE_LITE_C_OPS_MAXIMUM_H_ +#endif // MINDSPORE_LITE_SRC_OPS_MAXIMUM_H_ diff --git a/mindspore/lite/src/ops/maximum_grad.cc b/mindspore/lite/src/ops/maximum_grad.cc new file mode 100644 index 0000000000..9644957f55 --- /dev/null +++ b/mindspore/lite/src/ops/maximum_grad.cc @@ -0,0 +1,124 @@ +/** + * Copyright 2019-2020 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "include/errorcode.h" +#include "src/ops/maximum_grad.h" +#include "src/common/log_adapter.h" +#ifdef PRIMITIVE_WRITEABLE +#include +#include "tools/converter/quantizer/quantize_util.h" +#endif + +#ifndef PRIMITIVE_WRITEABLE +#include "src/ops/ops_register.h" +#endif + +namespace mindspore { +namespace lite { +#ifdef PRIMITIVE_WRITEABLE +int MaximumGrad::UnPackAttr(const Primitive &prim, const std::vector &inputs) { + if (this->primitive_ == nullptr) { + this->primitive_ = new (std::nothrow) schema::PrimitiveT; + if (this->primitive_ == nullptr) { + MS_LOG(ERROR) << "new primitiveT failed"; + return RET_ERROR; + } + this->primitive_->value.type = schema::PrimitiveType_MaximumGrad; + } + if (this->primitive_->value.type != schema::PrimitiveType_MaximumGrad) { + MS_LOG(ERROR) << "Primitive type is error :" << this->primitive_->value.type; + return RET_ERROR; + } + if (this->primitive_->value.value == nullptr) { + auto attr = new (std::nothrow) schema::MaximumGradT(); + if (attr == nullptr) { + MS_LOG(ERROR) << "new primitiveT value failed"; + return RET_ERROR; + } + this->primitive_->value.value = attr; + if (this->primitive_->value.value == nullptr) { + MS_LOG(ERROR) << "primitive value is nullptr"; + return RET_ERROR; + } + } + return RET_OK; +} +#else +int MaximumGrad::UnPackToFlatBuilder(const schema::Primitive *primitive, flatbuffers::FlatBufferBuilder *fbb) { + MS_ASSERT(nullptr != primitive); + MS_ASSERT(nullptr != fbb); + auto val_offset = schema::CreateMaximumGrad(*fbb); + auto prim_offset = schema::CreatePrimitive(*fbb, schema::PrimitiveType_MaximumGrad, val_offset.o); + fbb->Finish(prim_offset); + return RET_OK; +} +PrimitiveC *MaximumGradCreator(const schema::Primitive *primitive) { + return PrimitiveC::NewPrimitiveC(primitive); +} +Registry MaximumGradRegistry(schema::PrimitiveType_MaximumGrad, MaximumGradCreator); + +#endif +int MaximumGrad::InferShape(std::vector inputs_, std::vector outputs_) { + if (inputs_.size() != 3) { + MS_LOG(ERROR) << "The number of input must be 3"; + return RET_ERROR; + } + if (outputs_.size() != 2) { + MS_LOG(ERROR) << "The number of output must be 2"; + return RET_ERROR; + } + + auto x1 = inputs_[0]; + auto x2 = inputs_[1]; + auto dy = inputs_[2]; + auto dx1 = outputs_[0]; + auto dx2 = outputs_[1]; + + MS_ASSERT(dy != nullptr); + MS_ASSERT(x1 != nullptr); + MS_ASSERT(x2 != nullptr); + MS_ASSERT(dx1 != nullptr); + MS_ASSERT(dx2 != nullptr); + if (!GetInferFlag()) { + return RET_OK; + } + + auto inShape0 = x1->shape(); + auto inShape1 = x2->shape(); + auto outShape = dy->shape(); + + ndim_ = outShape.size(); + x1_shape_.resize(ndim_); + x2_shape_.resize(ndim_); + dy_shape_.resize(ndim_); + auto fillDimNum0 = outShape.size() - inShape0.size(); + auto fillDimNum1 = outShape.size() - inShape1.size(); + int j0 = 0; + int j1 = 0; + for (unsigned int i = 0; i < outShape.size(); i++) { + x1_shape_[i] = (i < fillDimNum0) ? 1 : inShape0[j0++]; + x2_shape_[i] = (i < fillDimNum1) ? 1 : inShape1[j1++]; + dy_shape_[i] = outShape[i]; + } + + dx1->set_shape(x1->shape()); + dx2->set_shape(x2->shape()); + dx1->set_data_type(dy->data_type()); + dx2->set_data_type(dy->data_type()); + return RET_OK; +} +} // namespace lite +} // namespace mindspore diff --git a/mindspore/lite/src/ops/maximum_grad.h b/mindspore/lite/src/ops/maximum_grad.h new file mode 100644 index 0000000000..10e73b485a --- /dev/null +++ b/mindspore/lite/src/ops/maximum_grad.h @@ -0,0 +1,46 @@ +/** + * Copyright 2020 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef MINDSPORE_LITE_SRC_OPS_MAXIMUM_GRAD_H_ +#define MINDSPORE_LITE_SRC_OPS_MAXIMUM_GRAD_H_ + +#include +#include +#include + +#include "src/ops/arithmetic_grad.h" +#include "src/ops/primitive_c.h" + +namespace mindspore { +namespace lite { +class MaximumGrad : public ArithmeticGrad { + public: +#ifdef PRIMITIVE_WRITEABLE + MS_DECLARE_PARENT(MaximumGrad, ArithmeticGrad); + MaximumGrad() = default; + explicit MaximumGrad(schema::PrimitiveT *primitive) : ArithmeticGrad(primitive) {} + int UnPackAttr(const Primitive &prim, const std::vector &inputs) override; +#else + MaximumGrad() = default; + + int UnPackToFlatBuilder(const schema::Primitive *primitive, flatbuffers::FlatBufferBuilder *fbb) override; +#endif + int InferShape(std::vector inputs_, std::vector outputs_) override; +}; +} // namespace lite +} // namespace mindspore + +#endif // MINDSPORE_LITE_SRC_OPS_MAXIMUM_GRAD_H_ diff --git a/mindspore/lite/src/ops/minimum.cc b/mindspore/lite/src/ops/minimum.cc index a7126c9502..5881976ad3 100644 --- a/mindspore/lite/src/ops/minimum.cc +++ b/mindspore/lite/src/ops/minimum.cc @@ -23,6 +23,33 @@ namespace mindspore { namespace lite { #ifdef PRIMITIVE_WRITEABLE +int Minimum::UnPackAttr(const Primitive &prim, const std::vector &inputs) { + if (this->primitive_ == nullptr) { + this->primitive_ = new (std::nothrow) schema::PrimitiveT; + if (this->primitive_ == nullptr) { + MS_LOG(ERROR) << "new primitiveT failed"; + return RET_ERROR; + } + this->primitive_->value.type = schema::PrimitiveType_Minimum; + } + if (this->primitive_->value.type != schema::PrimitiveType_Minimum) { + MS_LOG(ERROR) << "Primitive type is error :" << this->primitive_->value.type; + return RET_ERROR; + } + if (this->primitive_->value.value == nullptr) { + auto attr = new (std::nothrow) schema::MinimumT(); + if (attr == nullptr) { + MS_LOG(ERROR) << "new primitiveT value failed"; + return RET_ERROR; + } + this->primitive_->value.value = attr; + if (this->primitive_->value.value == nullptr) { + MS_LOG(ERROR) << "primitive value is nullptr"; + return RET_ERROR; + } + } + return RET_OK; +} #else int Minimum::UnPackToFlatBuilder(const schema::Primitive *primitive, flatbuffers::FlatBufferBuilder *fbb) { MS_ASSERT(nullptr != primitive); diff --git a/mindspore/lite/src/ops/minimum.h b/mindspore/lite/src/ops/minimum.h index 72f3ae26c2..de69645c70 100644 --- a/mindspore/lite/src/ops/minimum.h +++ b/mindspore/lite/src/ops/minimum.h @@ -14,8 +14,8 @@ * limitations under the License. */ -#ifndef LITE_MINDSPORE_LITE_C_OPS_MINIMUM_H_ -#define LITE_MINDSPORE_LITE_C_OPS_MINIMUM_H_ +#ifndef MINDSPORE_LITE_SRC_OPS_MINIMUM_H_ +#define MINDSPORE_LITE_SRC_OPS_MINIMUM_H_ #include #include @@ -32,6 +32,7 @@ class Minimum : public Arithmetic { #ifdef PRIMITIVE_WRITEABLE MS_DECLARE_PARENT(Arithmetic, Arithmetic); explicit Minimum(schema::PrimitiveT *primitive) : Arithmetic(primitive) {} + int UnPackAttr(const Primitive &prim, const std::vector &inputs) override; #else int UnPackToFlatBuilder(const schema::Primitive *primitive, flatbuffers::FlatBufferBuilder *fbb) override; #endif @@ -39,4 +40,4 @@ class Minimum : public Arithmetic { } // namespace lite } // namespace mindspore -#endif // LITE_MINDSPORE_LITE_C_OPS_MINIMUM_H_ +#endif // MINDSPORE_LITE_SRC_OPS_MINIMUM_H_ diff --git a/mindspore/lite/src/ops/minimum_grad.cc b/mindspore/lite/src/ops/minimum_grad.cc new file mode 100644 index 0000000000..f4a03b7d95 --- /dev/null +++ b/mindspore/lite/src/ops/minimum_grad.cc @@ -0,0 +1,76 @@ +/** + * Copyright 2019-2020 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "include/errorcode.h" +#include "src/ops/minimum_grad.h" +#include "src/common/log_adapter.h" +#ifdef PRIMITIVE_WRITEABLE +#include +#include "tools/converter/quantizer/quantize_util.h" +#endif + +#ifndef PRIMITIVE_WRITEABLE +#include "src/ops/ops_register.h" +#endif + +namespace mindspore { +namespace lite { +#ifdef PRIMITIVE_WRITEABLE +int MinimumGrad::UnPackAttr(const Primitive &prim, const std::vector &inputs) { + if (this->primitive_ == nullptr) { + this->primitive_ = new (std::nothrow) schema::PrimitiveT; + if (this->primitive_ == nullptr) { + MS_LOG(ERROR) << "new primitiveT failed"; + return RET_ERROR; + } + this->primitive_->value.type = schema::PrimitiveType_MinimumGrad; + } + if (this->primitive_->value.type != schema::PrimitiveType_MinimumGrad) { + MS_LOG(ERROR) << "Primitive type is error :" << this->primitive_->value.type; + return RET_ERROR; + } + if (this->primitive_->value.value == nullptr) { + auto attr = new (std::nothrow) schema::MinimumGradT(); + if (attr == nullptr) { + MS_LOG(ERROR) << "new primitiveT value failed"; + return RET_ERROR; + } + this->primitive_->value.value = attr; + if (this->primitive_->value.value == nullptr) { + MS_LOG(ERROR) << "primitive value is nullptr"; + return RET_ERROR; + } + } + return RET_OK; +} + +#else +PrimitiveC *MinimumGradCreator(const schema::Primitive *primitive) { + return PrimitiveC::NewPrimitiveC(primitive); +} +Registry MinimumGradRegistry(schema::PrimitiveType_MinimumGrad, MinimumGradCreator); + +int MinimumGrad::UnPackToFlatBuilder(const schema::Primitive *primitive, flatbuffers::FlatBufferBuilder *fbb) { + MS_ASSERT(nullptr != primitive); + MS_ASSERT(nullptr != fbb); + auto val_offset = schema::CreateMinimumGrad(*fbb); + auto prim_offset = schema::CreatePrimitive(*fbb, schema::PrimitiveType_MinimumGrad, val_offset.o); + fbb->Finish(prim_offset); + return RET_OK; +} +#endif +} // namespace lite +} // namespace mindspore diff --git a/mindspore/lite/src/ops/minimum_grad.h b/mindspore/lite/src/ops/minimum_grad.h new file mode 100644 index 0000000000..2d46776419 --- /dev/null +++ b/mindspore/lite/src/ops/minimum_grad.h @@ -0,0 +1,45 @@ +/** + * Copyright 2020 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef MINDSPORE_LITE_SRC_OPS_MINIMUM_GRAD_H_ +#define MINDSPORE_LITE_SRC_OPS_MINIMUM_GRAD_H_ + +#include +#include +#include + +#include "src/ops/arithmetic_grad.h" +#include "src/ops/primitive_c.h" + +namespace mindspore { +namespace lite { +class MinimumGrad : public ArithmeticGrad { + public: +#ifdef PRIMITIVE_WRITEABLE + MS_DECLARE_PARENT(MinimumGrad, ArithmeticGrad); + MinimumGrad() = default; + explicit MinimumGrad(schema::PrimitiveT *primitive) : ArithmeticGrad(primitive) {} + int UnPackAttr(const Primitive &prim, const std::vector &inputs) override; +#else + MinimumGrad() = default; + + int UnPackToFlatBuilder(const schema::Primitive *primitive, flatbuffers::FlatBufferBuilder *fbb) override; +#endif +}; +} // namespace lite +} // namespace mindspore + +#endif // MINDSPORE_LITE_SRC_OPS_MINIMUM_GRAD_H_ diff --git a/mindspore/lite/src/ops/primitive_c.cc b/mindspore/lite/src/ops/primitive_c.cc index 71f3235e48..5cef1e50d0 100644 --- a/mindspore/lite/src/ops/primitive_c.cc +++ b/mindspore/lite/src/ops/primitive_c.cc @@ -18,6 +18,7 @@ #ifdef PRIMITIVE_WRITEABLE #include #include + #include "tools/converter/quantizer/quantize_util.h" #include "src/ops/space_to_batch.h" #include "src/ops/space_to_batch_nd.h" @@ -167,12 +168,14 @@ #include "src/ops/sgd.h" #include "src/ops/adam.h" #include "src/ops/assign.h" +#include "src/ops/dropout_grad.h" +#include "src/ops/maximum_grad.h" +#include "src/ops/minimum_grad.h" #include "src/ops/control_depend.h" #include "src/ops/assign_add.h" #include "src/ops/binary_cross_entropy.h" #include "src/ops/binary_cross_entropy_grad.h" #endif - #endif namespace mindspore { namespace lite { @@ -506,10 +509,12 @@ std::shared_ptr PrimitiveC::Create(const Primitive &prim, const std: return NewPrimitiveC(prim, inputs, quantType); } else if (op_type == "Split") { return NewPrimitiveC(prim, inputs, quantType); - } else if (op_type == "While") { - return NewPrimitiveC(prim, inputs, quantType); } else if (op_type == "OneHot") { return NewPrimitiveC(prim, inputs, quantType); + } else if (op_type == "Dropout") { + return NewPrimitiveC(prim, inputs, quantType); + } else if (op_type == "While") { + return NewPrimitiveC(prim, inputs, quantType); } else if (op_type == "GatherV2") { return NewPrimitiveC(prim, inputs, quantType); } else if (op_type == "OnesLike") { @@ -537,7 +542,7 @@ std::shared_ptr PrimitiveC::Create(const Primitive &prim, const std: } else if ((op_type == "ReluGrad" || op_type == "ReLU6Grad" || op_type == "SigmoidGrad" || op_type == "HSigmoidGrad" || op_type == "HSwishGrad")) { return NewPrimitiveC(prim, inputs, quantType); - } else if ((op_type == "MaxPoolGrad") || (op_type == "MeanPoolGrad")) { + } else if ((op_type == "MaxPoolGrad") || (op_type == "MeanPoolGrad") || (op_type == "AvgPoolGradGpu")) { return NewPrimitiveC(prim, inputs, quantType); } else if (op_type == "Conv2DBackpropFilter") { return NewPrimitiveC(prim, inputs, quantType); @@ -559,6 +564,12 @@ std::shared_ptr PrimitiveC::Create(const Primitive &prim, const std: return NewPrimitiveC(prim, inputs, quantType); } else if (op_type == "Assign") { return NewPrimitiveC(prim, inputs, quantType); + } else if (op_type == "DropoutGrad") { + return NewPrimitiveC(prim, inputs, quantType); + } else if (op_type == "MaximumGrad") { + return NewPrimitiveC(prim, inputs, quantType); + } else if (op_type == "MinimumGrad") { + return NewPrimitiveC(prim, inputs, quantType); } else if (op_type == "AssignAdd") { return NewPrimitiveC(prim, inputs, quantType); } else if (op_type == "BinaryCrossEntropy") { @@ -884,7 +895,12 @@ PrimitiveC *PrimitiveC::Create(mindspore::schema::PrimitiveT *primitive) { return new BinaryCrossEntropyGrad(primitive); case schema::PrimitiveType_BinaryCrossEntropy: return new BinaryCrossEntropy(primitive); - + case schema::PrimitiveType_DropoutGrad: + return new DropoutGrad(primitive); + case schema::PrimitiveType_MaximumGrad: + return new MaximumGrad(primitive); + case schema::PrimitiveType_MinimumGrad: + return new MinimumGrad(primitive); #endif default: MS_LOG(ERROR) << "Unsupported primitive type in Create : " << schema::EnumNamePrimitiveType(op_type); @@ -892,6 +908,7 @@ PrimitiveC *PrimitiveC::Create(mindspore::schema::PrimitiveT *primitive) { } return nullptr; } + #else void PrimitiveC::SetQuantType(schema::QuantType quant_type) { this->quant_type_ = quant_type; } schema::QuantType PrimitiveC::GetQuantType() const { return quant_type_; } diff --git a/mindspore/lite/src/ops/squeeze.cc b/mindspore/lite/src/ops/squeeze.cc index 3e3d8589a7..eaa1c654dd 100644 --- a/mindspore/lite/src/ops/squeeze.cc +++ b/mindspore/lite/src/ops/squeeze.cc @@ -50,8 +50,7 @@ int Squeeze::UnPackAttr(const Primitive &prim, const std::vector &in MS_LOG(INFO) << "Squeeze's attr xis is set to default"; attr->axis = {0}; } else { - int axis = GetValue(prim.GetAttr("axis")); - attr->axis = {axis}; + attr->axis = GetValue>(prim.GetAttr("axis")); } this->primitive_->value.value = attr; } diff --git a/mindspore/lite/src/ops/sub.h b/mindspore/lite/src/ops/sub.h index 559c1df443..d431851ee3 100644 --- a/mindspore/lite/src/ops/sub.h +++ b/mindspore/lite/src/ops/sub.h @@ -14,8 +14,8 @@ * limitations under the License. */ -#ifndef LITE_MINDSPORE_LITE_C_OPS_SUB_H_ -#define LITE_MINDSPORE_LITE_C_OPS_SUB_H_ +#ifndef MINDSPORE_LITE_SRC_OPS_SUB_H_ +#define MINDSPORE_LITE_SRC_OPS_SUB_H_ #include #include @@ -34,7 +34,6 @@ class Sub : public Arithmetic { explicit Sub(schema::PrimitiveT *primitive) : Arithmetic(primitive) {} void SetActivationType(int activation_type); int UnPackAttr(const Primitive &prim, const std::vector &inputs) override; - #else int UnPackToFlatBuilder(const schema::Primitive *primitive, flatbuffers::FlatBufferBuilder *fbb) override; #endif @@ -43,4 +42,4 @@ class Sub : public Arithmetic { } // namespace lite } // namespace mindspore -#endif // LITE_MINDSPORE_LITE_C_OPS_SUB_H_ +#endif // MINDSPORE_LITE_SRC_OPS_SUB_H_ diff --git a/mindspore/lite/src/runtime/kernel/arm/fp32/fused_batchnorm_fp32.cc b/mindspore/lite/src/runtime/kernel/arm/fp32/fused_batchnorm_fp32.cc index fe3a955165..e55ab6efb2 100644 --- a/mindspore/lite/src/runtime/kernel/arm/fp32/fused_batchnorm_fp32.cc +++ b/mindspore/lite/src/runtime/kernel/arm/fp32/fused_batchnorm_fp32.cc @@ -39,14 +39,6 @@ void FusedBatchnormCPUKernel::FreeScaleAndOffset() { free(offset_); offset_ = nullptr; } - if (save_mean_ != nullptr) { - free(save_mean_); - save_mean_ = nullptr; - } - if (save_variance_ != nullptr) { - free(save_variance_); - save_variance_ = nullptr; - } } int FusedBatchnormCPUKernel::InitConstTensor() { @@ -59,11 +51,8 @@ int FusedBatchnormCPUKernel::InitConstTensor() { offset_ = malloc(offset->Size()); mean_ = malloc(mean->Size()); variance_ = malloc(variance->Size()); - save_mean_ = malloc(mean->Size()); - save_variance_ = malloc(variance->Size()); - if (scale_ == nullptr || offset_ == nullptr || mean_ == nullptr || variance_ == nullptr || save_mean_ == nullptr || - save_variance_ == nullptr) { + if (scale_ == nullptr || offset_ == nullptr || mean_ == nullptr || variance_ == nullptr) { FreeMeanAndVariance(); FreeScaleAndOffset(); MS_LOG(ERROR) << "Memory allocation failed"; @@ -73,61 +62,64 @@ int FusedBatchnormCPUKernel::InitConstTensor() { memcpy(offset_, offset->MutableData(), offset->Size()); memcpy(mean_, mean->MutableData(), mean->Size()); memcpy(variance_, variance->MutableData(), variance->Size()); - memset(save_mean_, 0, mean->Size()); - memset(save_variance_, 0, variance->Size()); - if (out_tensors_.size() > 4) { - for (size_t i = 1; i < out_tensors_.size(); i++) { - auto *data = static_cast(out_tensors_[i]->MutableData()); - std::fill(data, data + out_tensors_[i]->ElementsNum(), 0.f); - } - } return RET_OK; } int FusedBatchnormCPUKernel::Run() { auto param = reinterpret_cast(op_parameter_); - if (is_train() && in_tensors_.size() >= 5) { + if (IsTrain() && IsTrainable() && in_tensors_.size() >= 5) { float *in = static_cast(in_tensors_[0]->MutableData()); float *scale = static_cast(in_tensors_[1]->MutableData()); - float *bias = static_cast(in_tensors_[2]->MutableData()); - float *mean = static_cast(in_tensors_[3]->MutableData()); - float *var = static_cast(in_tensors_[4]->MutableData()); - std::fill(mean, mean + in_tensors_[3]->ElementsNum(), 0.f); - std::fill(var, var + in_tensors_[4]->ElementsNum(), 0.f); - FusedBatchNormFp32MeanVar(in, mean, var, param, static_cast(save_mean_), - static_cast(save_variance_)); - memcpy(out_tensors_[3]->MutableData(), save_mean_, out_tensors_[3]->Size()); - memcpy(out_tensors_[4]->MutableData(), save_variance_, out_tensors_[3]->Size()); - memcpy(mean_, mean, in_tensors_[3]->Size()); - memcpy(variance_, var, in_tensors_[4]->Size()); + float *offset = static_cast(in_tensors_[2]->MutableData()); + float *current_mean = static_cast(mean_); + float *current_var = static_cast(variance_); + float *save_mean = static_cast(in_tensors_[3]->MutableData()); + float *save_variance = static_cast(in_tensors_[4]->MutableData()); + + std::fill(current_mean, current_mean + in_tensors_[3]->ElementsNum(), 0.f); + std::fill(current_var, current_var + in_tensors_[4]->ElementsNum(), 0.f); + FusedBatchNormFp32MeanVar(in, current_mean, current_var, param, static_cast(save_mean), + static_cast(save_variance)); + + memcpy(out_tensors_[1]->MutableData(), scale, out_tensors_[1]->Size()); + memcpy(out_tensors_[2]->MutableData(), offset, out_tensors_[2]->Size()); + memcpy(out_tensors_[3]->MutableData(), current_mean, out_tensors_[3]->Size()); + memcpy(out_tensors_[4]->MutableData(), current_var, out_tensors_[4]->Size()); + + // Copy to local variables memcpy(scale_, scale, in_tensors_[1]->Size()); - memcpy(offset_, bias, in_tensors_[2]->Size()); + memcpy(offset_, offset, in_tensors_[2]->Size()); + + // save for next iteration + memcpy(in_tensors_[3]->MutableData(), save_mean, in_tensors_[3]->Size()); + memcpy(in_tensors_[4]->MutableData(), save_variance, in_tensors_[4]->Size()); + trained_ = true; // trained at least once } auto ret = ParallelLaunch(this->context_->thread_pool_, BatchNormRun, this, op_parameter_->thread_num_); if (ret != RET_OK) { MS_LOG(ERROR) << "BatchnormRun error error_code[" << ret << "]"; } + return ret; } -void FusedBatchnormCPUKernel::eval() { - LiteKernel::eval(); +int FusedBatchnormCPUKernel::Eval() { + LiteKernel::Eval(); if (trained_) { - float *run_mean = static_cast(in_tensors_[3]->MutableData()); - float *run_var = static_cast(in_tensors_[4]->MutableData()); + float *save_mean = static_cast(in_tensors_[3]->MutableData()); + float *save_var = static_cast(in_tensors_[4]->MutableData()); float *scale = static_cast(in_tensors_[1]->MutableData()); float *bias = static_cast(in_tensors_[2]->MutableData()); - // Copy to input tensors for Model export - memcpy(run_mean, save_mean_, in_tensors_[3]->Size()); - memcpy(run_var, save_variance_, in_tensors_[4]->Size()); + // Copy to local variables - memcpy(mean_, run_mean, in_tensors_[3]->Size()); - memcpy(variance_, run_var, in_tensors_[4]->Size()); memcpy(scale_, scale, in_tensors_[1]->Size()); memcpy(offset_, bias, in_tensors_[2]->Size()); + memcpy(mean_, save_mean, in_tensors_[3]->Size()); + memcpy(variance_, save_var, in_tensors_[4]->Size()); } + return RET_OK; } int FusedBatchnormCPUKernel::DoExecute(int task_id) { diff --git a/mindspore/lite/src/runtime/kernel/arm/fp32/fused_batchnorm_fp32.h b/mindspore/lite/src/runtime/kernel/arm/fp32/fused_batchnorm_fp32.h index cea5a532d0..0265549c9f 100644 --- a/mindspore/lite/src/runtime/kernel/arm/fp32/fused_batchnorm_fp32.h +++ b/mindspore/lite/src/runtime/kernel/arm/fp32/fused_batchnorm_fp32.h @@ -29,7 +29,7 @@ class FusedBatchnormCPUKernel : public BatchnormCPUKernel { : BatchnormCPUKernel(parameter, inputs, outputs, ctx, primitive) {} ~FusedBatchnormCPUKernel() { FreeScaleAndOffset(); } - void eval() override; + int Eval() override; int ReSize() override; int Run() override; int InitConstTensor() override; @@ -39,8 +39,6 @@ class FusedBatchnormCPUKernel : public BatchnormCPUKernel { void FreeScaleAndOffset(); void *scale_ = nullptr; void *offset_ = nullptr; - void *save_mean_ = nullptr; - void *save_variance_ = nullptr; bool trained_ = false; }; } // namespace mindspore::kernel diff --git a/mindspore/lite/src/runtime/kernel/arm/fp32/matmul_fp32.cc b/mindspore/lite/src/runtime/kernel/arm/fp32/matmul_fp32.cc index c0d37b7dfd..d0c14e244b 100644 --- a/mindspore/lite/src/runtime/kernel/arm/fp32/matmul_fp32.cc +++ b/mindspore/lite/src/runtime/kernel/arm/fp32/matmul_fp32.cc @@ -140,10 +140,12 @@ int MatmulCPUKernel::InitBias() { : (c_shape[c_shape.size() - 1]); params_->col_8_ = UP_ROUND(params_->col_, 8); auto col_tmp = is_vector_a_ ? params_->col_ : params_->col_8_; - bias_ptr_ = reinterpret_cast(malloc(col_tmp * sizeof(float))); if (bias_ptr_ == nullptr) { - FreeTmpBuffer(); - return RET_MEMORY_FAILED; + bias_ptr_ = reinterpret_cast(malloc(col_tmp * sizeof(float))); + if (bias_ptr_ == nullptr) { + FreeTmpBuffer(); + return RET_MEMORY_FAILED; + } } memset(bias_ptr_, 0, col_tmp * sizeof(float)); if (in_tensors_.size() == 3) { @@ -154,6 +156,8 @@ int MatmulCPUKernel::InitBias() { int MatmulCPUKernel::ReSize() { if (!params_->b_const_) { + free(bias_ptr_); + bias_ptr_ = nullptr; auto ret = InitBias(); if (ret != RET_OK) { MS_LOG(ERROR) << "Matmul fp32 init bias failed"; @@ -277,7 +281,7 @@ int MatmulCPUKernel::Run() { auto b_src = reinterpret_cast(in_tensors_[1]->data_c()); auto c_src = reinterpret_cast(out_tensors_[0]->data_c()); - if (!params_->a_const_ || is_train()) { + if (!params_->a_const_ || IsTrain()) { if (a_pack_ptr_ != nullptr) { params_->a_const_ ? free(a_pack_ptr_) : context_->allocator->Free(a_pack_ptr_); a_pack_ptr_ = nullptr; @@ -294,7 +298,7 @@ int MatmulCPUKernel::Run() { a_ptr_ = a_pack_ptr_; } } - if (!params_->b_const_ || is_train()) { + if (!params_->b_const_ || IsTrain()) { if (b_pack_ptr_ != nullptr) { params_->b_const_ ? free(b_pack_ptr_) : context_->allocator->Free(b_pack_ptr_); b_pack_ptr_ = nullptr; @@ -311,7 +315,9 @@ int MatmulCPUKernel::Run() { b_ptr_ = b_pack_ptr_; } } - + if (IsTrain()) { + InitBias(); + } for (int i = 0; i < params_->batch; ++i) { if (is_vector_a_) { cur_a_ptr_ = a_ptr_ + i * params_->deep_; @@ -329,26 +335,54 @@ int MatmulCPUKernel::Run() { return RET_ERROR; } } - if (!params_->a_const_ || is_train()) { - context_->allocator->Free(a_pack_ptr_); + if (!params_->a_const_ || IsTrain()) { + params_->a_const_ ? free(a_pack_ptr_) : context_->allocator->Free(a_pack_ptr_); a_pack_ptr_ = nullptr; } - if (!params_->b_const_ || is_train()) { - context_->allocator->Free(b_pack_ptr_); + if (!params_->b_const_ || IsTrain()) { + params_->b_const_ ? free(b_pack_ptr_) : context_->allocator->Free(b_pack_ptr_); b_pack_ptr_ = nullptr; } return RET_OK; } -void MatmulCPUKernel::eval() { +int MatmulCPUKernel::Eval() { // Copy weights after training - LiteKernel::eval(); + auto a_src = reinterpret_cast(in_tensors_[0]->data_c()); + auto b_src = reinterpret_cast(in_tensors_[1]->data_c()); + LiteKernel::Eval(); if (params_->a_const_) { - InitMatrixA(reinterpret_cast(in_tensors_[0]->MutableData()), a_pack_ptr_); + if (a_pack_ptr_ == nullptr) { + auto ret = MallocMatrixABuffer(); + if (ret != RET_OK) { + MS_LOG(ERROR) << "Matmul fp32 malloc matrix a buffer failed"; + return RET_ERROR; + } + } + if (is_vector_a_) { + a_ptr_ = a_src; + } else { + InitMatrixA(a_src, a_pack_ptr_); + a_ptr_ = a_pack_ptr_; + } } if (params_->b_const_) { - InitMatrixB(reinterpret_cast(in_tensors_[1]->MutableData()), b_pack_ptr_); + if (b_pack_ptr_ == nullptr) { + auto ret = MallocMatrixBBuffer(); + if (ret != RET_OK) { + MS_LOG(ERROR) << "Matmul fp32 malloc matrix b buffer failed"; + return RET_ERROR; + } + } + if (is_vector_a_ && params_->b_transpose_) { + b_ptr_ = b_src; + } else { + InitMatrixB(b_src, b_pack_ptr_); + b_ptr_ = b_pack_ptr_; + } } + InitBias(); + return RET_OK; } kernel::LiteKernel *CpuMatmulFp32KernelCreator(const std::vector &inputs, diff --git a/mindspore/lite/src/runtime/kernel/arm/fp32/matmul_fp32.h b/mindspore/lite/src/runtime/kernel/arm/fp32/matmul_fp32.h index 68ca588761..f48e53d608 100644 --- a/mindspore/lite/src/runtime/kernel/arm/fp32/matmul_fp32.h +++ b/mindspore/lite/src/runtime/kernel/arm/fp32/matmul_fp32.h @@ -34,7 +34,7 @@ class MatmulCPUKernel : public MatmulBaseCPUKernel { int ReSize() override; int Run() override; int RunImpl(int task_id); - void eval() override; + int Eval() override; private: int MallocMatrixABuffer(); diff --git a/mindspore/lite/src/runtime/kernel/arm/fp32/one_hot_fp32.cc b/mindspore/lite/src/runtime/kernel/arm/fp32/one_hot_fp32.cc index 619e33d03b..4b452297cd 100644 --- a/mindspore/lite/src/runtime/kernel/arm/fp32/one_hot_fp32.cc +++ b/mindspore/lite/src/runtime/kernel/arm/fp32/one_hot_fp32.cc @@ -214,5 +214,5 @@ kernel::LiteKernel *CpuOneHotFp32KernelCreator(const std::vector return kernel; } -REG_KERNEL(kCPU, kNumberTypeFloat32, PrimitiveType_OneHot, CpuOneHotFp32KernelCreator) +REG_KERNEL(kCPU, kNumberTypeInt32, PrimitiveType_OneHot, CpuOneHotFp32KernelCreator) } // namespace mindspore::kernel diff --git a/mindspore/lite/src/runtime/kernel/arm/fp32_grad/adam.cc b/mindspore/lite/src/runtime/kernel/arm/fp32_grad/adam.cc index 81cfa992d0..65572eebf0 100644 --- a/mindspore/lite/src/runtime/kernel/arm/fp32_grad/adam.cc +++ b/mindspore/lite/src/runtime/kernel/arm/fp32_grad/adam.cc @@ -45,24 +45,19 @@ int AdamCPUKernel::Execute(int task_id) { auto eps = reinterpret_cast(in_tensors_[8]->MutableData())[0]; auto gradient = reinterpret_cast(in_tensors_[9]->MutableData()); size_t elem_num = in_tensors_[0]->ElementsNum(); + auto update_lr = learning_rate * std::sqrt(1 - beta2_power) / (1 - beta1_power); if (adam_param_->use_nesterov_) { // Nadam for (size_t i = 0; i < elem_num; ++i) { - m[i] = (m[i] * beta1) + (gradient[i] * (1.f - beta1)); - v[i] = (v[i] * beta2) + (gradient[i] * gradient[i] * (1.f - beta2)); - auto g_hat = gradient[i] / (1 - beta1_power); - auto m_hat = m[i] / (1 - beta1_power); - auto v_hat = v[i] / (1 - beta2_power); - auto m_tag = (1.f - beta1) * g_hat + beta1 * m_hat; - weight[i] -= learning_rate * m_tag / (sqrtf(v_hat) + eps); + m[i] += (gradient[i] - m[i]) * (1 - beta1); + v[i] += (gradient[i] * gradient[i] - v[i]) * (1 - beta2); + weight[i] -= update_lr * (m[i] * beta1 + (1 - beta1) * gradient[i]) / (std::sqrt(v[i]) + eps); } } else { for (size_t i = 0; i < elem_num; ++i) { - m[i] = (m[i] * beta1) + (gradient[i] * (1.f - beta1)); - v[i] = (v[i] * beta2) + (gradient[i] * gradient[i] * (1.f - beta2)); - auto m_hat = m[i] / (1 - beta1_power); - auto v_hat = v[i] / (1 - beta2_power); - weight[i] -= learning_rate * m_hat / (sqrtf(v_hat) + eps); + m[i] += (gradient[i] - m[i]) * (1 - beta1); + v[i] += (gradient[i] * gradient[i] - v[i]) * (1 - beta2); + weight[i] -= update_lr * m[i] / (std::sqrt(v[i]) + eps); } } return RET_OK; diff --git a/mindspore/lite/src/runtime/kernel/arm/fp32_grad/arithmetic_grad.cc b/mindspore/lite/src/runtime/kernel/arm/fp32_grad/arithmetic_grad.cc index e847c3cf64..833f022522 100644 --- a/mindspore/lite/src/runtime/kernel/arm/fp32_grad/arithmetic_grad.cc +++ b/mindspore/lite/src/runtime/kernel/arm/fp32_grad/arithmetic_grad.cc @@ -177,6 +177,28 @@ void ArithmeticGradCPUKernel::ArithmeticGradDiv2L(float *dy, int dy_size, float ElementDivNegSquare(tile_data2, x2_data, dx2, dy_size); } +void ArithmeticGradCPUKernel::ArithmeticGradMaximum(float *dy, int dy_size, float *dx1, int dx1_size, float *dx2, + int dx2_size) { + // For some reason, input order is x0, x1, dy + auto x1 = reinterpret_cast(in_tensors_[0]->MutableData()); + auto x2 = reinterpret_cast(in_tensors_[1]->MutableData()); + dy = reinterpret_cast(in_tensors_[2]->MutableData()); + + MaximumByAxes(x1, x2, dy, arithmeticParameter_->in_shape0_, arithmeticParameter_->in_shape1_, + arithmeticParameter_->out_shape_, dx1, dx2, arithmeticParameter_->ndim_); +} + +void ArithmeticGradCPUKernel::ArithmeticGradMinimum(float *dy, int dy_size, float *dx1, int dx1_size, float *dx2, + int dx2_size) { + // For some reason, input order is x0, x1, dy + auto x1 = reinterpret_cast(in_tensors_[0]->MutableData()); + auto x2 = reinterpret_cast(in_tensors_[1]->MutableData()); + dy = reinterpret_cast(in_tensors_[2]->MutableData()); + + MinimumByAxes(x1, x2, dy, arithmeticParameter_->out_shape_, arithmeticParameter_->in_shape0_, + arithmeticParameter_->in_shape1_, dx1, dx2, arithmeticParameter_->ndim_); +} + int ArithmeticGradCPUKernel::ReSize() { return RET_OK; } int ArithmeticGradCPUKernel::Execute(int task_id) { @@ -240,4 +262,6 @@ REG_KERNEL(kCPU, kNumberTypeFloat32, PrimitiveType_MulGrad, CpuArithmeticGradFp3 REG_KERNEL(kCPU, kNumberTypeFloat32, PrimitiveType_AddGrad, CpuArithmeticGradFp32KernelCreator) REG_KERNEL(kCPU, kNumberTypeFloat32, PrimitiveType_SubGrad, CpuArithmeticGradFp32KernelCreator) REG_KERNEL(kCPU, kNumberTypeFloat32, PrimitiveType_DivGrad, CpuArithmeticGradFp32KernelCreator) +REG_KERNEL(kCPU, kNumberTypeFloat32, PrimitiveType_MaximumGrad, CpuArithmeticGradFp32KernelCreator) +REG_KERNEL(kCPU, kNumberTypeFloat32, PrimitiveType_MinimumGrad, CpuArithmeticGradFp32KernelCreator) } // namespace mindspore::kernel diff --git a/mindspore/lite/src/runtime/kernel/arm/fp32_grad/arithmetic_grad.h b/mindspore/lite/src/runtime/kernel/arm/fp32_grad/arithmetic_grad.h index 7f480daf4c..c11d01f079 100644 --- a/mindspore/lite/src/runtime/kernel/arm/fp32_grad/arithmetic_grad.h +++ b/mindspore/lite/src/runtime/kernel/arm/fp32_grad/arithmetic_grad.h @@ -24,6 +24,8 @@ using mindspore::schema::PrimitiveType_AddGrad; using mindspore::schema::PrimitiveType_DivGrad; +using mindspore::schema::PrimitiveType_MaximumGrad; +using mindspore::schema::PrimitiveType_MinimumGrad; using mindspore::schema::PrimitiveType_MulGrad; using mindspore::schema::PrimitiveType_SubGrad; @@ -52,6 +54,12 @@ class ArithmeticGradCPUKernel : public LiteKernel { case PrimitiveType_DivGrad: arithmetic_grad_ = &ArithmeticGradCPUKernel::ArithmeticGradDiv; // this will be adjusted in InferShape break; + case PrimitiveType_MaximumGrad: + arithmetic_grad_ = &ArithmeticGradCPUKernel::ArithmeticGradMaximum; + break; + case PrimitiveType_MinimumGrad: + arithmetic_grad_ = &ArithmeticGradCPUKernel::ArithmeticGradMinimum; + break; default: MS_LOG(ERROR) << "Error Operator type " << parameter->type_; break; @@ -79,6 +87,8 @@ class ArithmeticGradCPUKernel : public LiteKernel { void ArithmeticGradDiv(float *dy, int dy_size, float *dx1, int dx1_size, float *dx2, int dx2_size); void ArithmeticGradDiv1L(float *dy, int dy_size, float *dx1, int dx1_size, float *dx2, int dx2_size); void ArithmeticGradDiv2L(float *dy, int dy_size, float *dx1, int dx1_size, float *dx2, int dx2_size); + void ArithmeticGradMaximum(float *dy, int dy_size, float *dx1, int dx1_size, float *dx2, int dx2_size); + void ArithmeticGradMinimum(float *dy, int dy_size, float *dx1, int dx1_size, float *dx2, int dx2_size); ArithmeticParameter *arithmeticParameter_; ArithmeticGradOperation arithmetic_grad_; float *tile_data0; diff --git a/mindspore/lite/src/runtime/kernel/arm/fp32_grad/bn_grad.cc b/mindspore/lite/src/runtime/kernel/arm/fp32_grad/bn_grad.cc index c821003cec..12c8002b70 100644 --- a/mindspore/lite/src/runtime/kernel/arm/fp32_grad/bn_grad.cc +++ b/mindspore/lite/src/runtime/kernel/arm/fp32_grad/bn_grad.cc @@ -15,6 +15,7 @@ */ #include "src/runtime/kernel/arm/fp32_grad/bn_grad.h" +#include #include #include #include "schema/model_generated.h" @@ -34,7 +35,7 @@ namespace mindspore::kernel { int BNGradCPUKernel::Init() { auto *input_x = in_tensors_.at(1); int channels = input_x->shape().at(kNHWC_C); - SetWorkspaceSize(4 * channels * sizeof(float)); + SetWorkspaceSize(2 * channels * sizeof(float)); return RET_OK; } @@ -45,19 +46,23 @@ int BNGradCPUKernel::Execute(int task_id) { auto *input_yt = in_tensors_.at(0); auto *input_x = in_tensors_.at(1); auto *input_scale = in_tensors_.at(2); + auto *input_mean = in_tensors_.at(3); + auto *input_var = in_tensors_.at(4); + + float *save_mean = reinterpret_cast(input_mean->MutableData()); + float *save_var = reinterpret_cast(input_var->MutableData()); + auto *output_dx = out_tensors_.at(0); auto *output_scale = out_tensors_.at(1); auto *output_bias = out_tensors_.at(2); - int batch = input_x->Batch(); - int channels = input_x->Channel(); - int spatial = input_x->Height() * input_x->Width(); + size_t batch = input_x->Batch(); + size_t channels = input_x->Channel(); + size_t spatial = input_x->Height() * input_x->Width(); float eps = bn_param->epsilon_; float *workspace = static_cast(GetWorkspace()); std::fill(workspace, workspace + GetWorkspaceSize() / sizeof(*workspace), 0.f); - float *mean = workspace; - float *invar = mean + channels; - float *dxhat_sum = invar + channels; + float *dxhat_sum = workspace; float *dxhathat_sum = dxhat_sum + channels; float *x = reinterpret_cast(input_x->MutableData()); @@ -67,11 +72,14 @@ int BNGradCPUKernel::Execute(int task_id) { float *dscale = reinterpret_cast(output_scale->MutableData()); float *dbias = reinterpret_cast(output_bias->MutableData()); - backwardX(x, yt, scale, batch * spatial, channels, eps, mean, invar, dxhat_sum, dxhathat_sum, dx); + var2Invar(save_var, input_var->ElementsNum(), eps); + // dx + backwardX(x, yt, scale, batch * spatial, channels, save_mean, save_var, dxhat_sum, dxhathat_sum, dx); // dbias sumSpatialBatch(yt, batch * spatial, channels, dbias); // dscale - backwardScale(x, mean, invar, yt, batch, channels, spatial, dscale); + backwardScale(x, save_mean, save_var, yt, batch, channels, spatial, dscale); + return RET_OK; } diff --git a/mindspore/lite/src/runtime/kernel/arm/fp32_grad/convolution.cc b/mindspore/lite/src/runtime/kernel/arm/fp32_grad/convolution.cc index e3f27962ba..af2f29b4ef 100644 --- a/mindspore/lite/src/runtime/kernel/arm/fp32_grad/convolution.cc +++ b/mindspore/lite/src/runtime/kernel/arm/fp32_grad/convolution.cc @@ -19,6 +19,7 @@ #include "nnacl/fp32_grad/gemm.h" #include "include/errorcode.h" #include "src/runtime/runtime_api.h" +#include "nnacl/pack.h" using mindspore::kernel::KERNEL_ARCH::kCPU; using mindspore::lite::RET_ERROR; @@ -26,8 +27,8 @@ using mindspore::lite::RET_OK; namespace mindspore::kernel { int ConvolutionTrainCPUKernel::Init() { - if (2 != in_tensors_.size()) { - MS_LOG(ERROR) << "Convolution should have two inputs"; + if (2 > in_tensors_.size()) { + MS_LOG(ERROR) << "Convolution should have at least two inputs"; return RET_ERROR; } if (1 != out_tensors_.size()) { @@ -51,11 +52,11 @@ int ConvolutionTrainCPUKernel::Init() { conv_param_->kernel_w_ = input_weight->shape().at(kNHWC_W); conv_param_->group_ = (conv_param_->group_ == 0) ? conv_param_->input_channel_ : conv_param_->group_; - - int ws_size = conv_param_->output_h_ * conv_param_->output_w_ * conv_param_->kernel_h_ * conv_param_->kernel_w_ * - conv_param_->input_channel_ / conv_param_->group_; - - SetWorkspaceSize(ws_size * sizeof(float)); + const int n = conv_param_->output_channel_ * conv_param_->group_; + const int k = conv_param_->kernel_h_ * conv_param_->kernel_w_ * conv_param_->input_channel_ / conv_param_->group_; + ws_size = chunk * k; + int mat_alloc = MatSizeTotal(chunk, n, k, 0); + SetWorkspaceSize((ws_size + mat_alloc) * sizeof(float)); return RET_OK; } @@ -71,36 +72,35 @@ int ConvolutionTrainCPUKernel::Execute(int task_id) { auto y_addr = reinterpret_cast(out_y->MutableData()); auto w_addr = reinterpret_cast(input_w->MutableData()); - int i, j; - int nweights = input_w->ElementsNum(); - int in_ch = conv_param_->input_channel_; - int in_h = conv_param_->input_h_; - int in_w = conv_param_->input_w_; - int k_h = conv_param_->kernel_h_; - int k_w = conv_param_->kernel_w_; - int batch = conv_param_->output_batch_; - int out_ch = conv_param_->output_channel_; // out_y->shape()[3]; - int groups = conv_param_->group_; - int out_h = conv_param_->output_h_; - int out_w = conv_param_->output_w_; - int m = out_h * out_w; - int n = out_ch / groups; - int k = k_h * k_w * in_ch / groups; + const int nweights = input_w->ElementsNum(); + const int in_ch = conv_param_->input_channel_; + const int in_h = conv_param_->input_h_; + const int in_w = conv_param_->input_w_; + const int k_h = conv_param_->kernel_h_; + const int k_w = conv_param_->kernel_w_; + const int batch = conv_param_->output_batch_; + const int out_ch = conv_param_->output_channel_; // out_y->shape()[3]; + const int groups = conv_param_->group_; + const int out_h = conv_param_->output_h_; + const int out_w = conv_param_->output_w_; + const int m = out_h * out_w; + const int n = out_ch / groups; + const int k = k_h * k_w * in_ch / groups; float *workspace = static_cast(GetWorkspace()); - - memset(y_addr, 0, out_y->Size()); - - for (i = 0; i < batch; ++i) { - for (j = 0; j < groups; ++j) { - float *mat_a = workspace; - float *mat_b = w_addr + j * nweights / groups; - float *mat_c = y_addr + (i * groups) * n * m + j * (out_ch / groups); - float *im = x_addr + (i * groups) * (in_ch / groups) * in_h * in_w + j * (in_ch / groups); - im2col_hwc(im, mat_a, conv_param_); - gemm(0, 1, m, n, k, 1, mat_a, k, mat_b, k, 1, mat_c, out_ch); + float *mat_workspace = workspace + ws_size; + for (int i = 0; i < batch; ++i) { + for (int j = 0; j < groups; ++j) { + for (int ci = 0; ci < m; ci += chunk) { + int real_chunk = MSMIN(m - ci, chunk); + float *mat_a = workspace; + const float *mat_b = w_addr + j * nweights / groups; + float *mat_c = y_addr + (i * groups) * n * m + j * (out_ch / groups) + ci * out_ch; + float *im = x_addr + (i * groups) * (in_ch / groups) * in_h * in_w + j * (in_ch / groups); + RollingIm2ColPackUnitFp32(im, conv_param_, mat_a, real_chunk, ci); + GemmMatmul(0, 1, real_chunk, n, k, 1, mat_a, k, mat_b, k, 0, mat_c, out_ch, mat_workspace); + } } } - return RET_OK; } diff --git a/mindspore/lite/src/runtime/kernel/arm/fp32_grad/convolution.h b/mindspore/lite/src/runtime/kernel/arm/fp32_grad/convolution.h index dd92d28183..dd212e7f87 100644 --- a/mindspore/lite/src/runtime/kernel/arm/fp32_grad/convolution.h +++ b/mindspore/lite/src/runtime/kernel/arm/fp32_grad/convolution.h @@ -33,6 +33,14 @@ class ConvolutionTrainCPUKernel : public LiteKernel { int ReSize() override; int Run() override; int Execute(int task_id); + + private: + int ws_size = 0; +#ifdef ENABLE_ARM32 + const int chunk = C4NUM; +#else + const int chunk = C12NUM; +#endif }; kernel::LiteKernel *CpuConvTrainFp32KernelCreator(const std::vector &inputs, diff --git a/mindspore/lite/src/runtime/kernel/arm/fp32_grad/convolution_grad_filter.cc b/mindspore/lite/src/runtime/kernel/arm/fp32_grad/convolution_grad_filter.cc index c72306b009..5c73bbfe63 100644 --- a/mindspore/lite/src/runtime/kernel/arm/fp32_grad/convolution_grad_filter.cc +++ b/mindspore/lite/src/runtime/kernel/arm/fp32_grad/convolution_grad_filter.cc @@ -51,10 +51,12 @@ int ConvolutionGradFilterCPUKernel::Init() { conv_param->output_h_ = dy_tensor->shape()[kNHWC_H]; conv_param->output_w_ = dy_tensor->shape()[kNHWC_W]; - size_t ws_size = conv_param->output_h_ * conv_param->output_w_ * conv_param->kernel_h_ * conv_param->kernel_w_ * - conv_param->input_channel_ / conv_param->group_; + ws_size = chunk * conv_param->kernel_h_ * conv_param->kernel_w_ * conv_param->input_channel_ / conv_param->group_; - SetWorkspaceSize(ws_size * sizeof(float)); + int n = conv_param->kernel_h_ * conv_param->kernel_w_ * conv_param->input_channel_ / conv_param->group_; + int k = conv_param->output_channel_ / conv_param->group_; + size_t mat_alloc = MatSizeTotal(k, n, chunk, n); + SetWorkspaceSize((ws_size + mat_alloc) * sizeof(float)); return RET_OK; } @@ -88,19 +90,21 @@ int ConvolutionGradFilterCPUKernel::Execute(int task_id) { int k = out_ch / groups; float *workspace = reinterpret_cast(GetWorkspace()); - + float *mat_workspace = workspace + ws_size; // zero out pointer memset(dw_addr, 0, out_dw->Size()); - for (i = 0; i < batch; ++i) { for (j = 0; j < groups; ++j) { - float *mat_a = dy_addr + (i * groups) * m * k + j * (out_ch / groups); - float *mat_b = workspace; - float *mat_c = dw_addr + j * nweights / groups; - float *im = x_addr + (i * in_ch * in_h * in_w) + j * (in_ch / groups); - - im2row_hwc(im, mat_b, conv_param, false); - gemm(1, 1, k, n, m, 1, mat_a, out_ch, mat_b, m, 1, mat_c, n); + for (int ci = 0; ci < m; ci += chunk) { + int real_chunk = MSMIN(m - ci, chunk); + float *mat_a = dy_addr + (i * groups) * m * k + j * (out_ch / groups) + ci * out_ch; + float *mat_b = workspace; + float *mat_c = dw_addr + j * nweights / groups; + float *im = x_addr + (i * in_ch * in_h * in_w) + j * (in_ch / groups); + memset(mat_b, 0, n * real_chunk * sizeof(float)); + RollingIm2ColPackUnitFp32(im, conv_param, mat_b, real_chunk, ci); + GemmMatmul(1, 0, k, n, real_chunk, 1, mat_a, out_ch, mat_b, n, 1, mat_c, n, mat_workspace); + } } } return RET_OK; diff --git a/mindspore/lite/src/runtime/kernel/arm/fp32_grad/convolution_grad_filter.h b/mindspore/lite/src/runtime/kernel/arm/fp32_grad/convolution_grad_filter.h index a8eaefdafc..763abc7612 100644 --- a/mindspore/lite/src/runtime/kernel/arm/fp32_grad/convolution_grad_filter.h +++ b/mindspore/lite/src/runtime/kernel/arm/fp32_grad/convolution_grad_filter.h @@ -34,6 +34,14 @@ class ConvolutionGradFilterCPUKernel : public LiteKernel { int ReSize() override; int Run() override; int Execute(int task_id); + + private: + size_t ws_size = 0; +#ifdef ENABLE_ARM32 + const int chunk = C4NUM; +#else + const int chunk = C12NUM; +#endif }; } // namespace mindspore::kernel diff --git a/mindspore/lite/src/runtime/kernel/arm/fp32_grad/convolution_grad_input.cc b/mindspore/lite/src/runtime/kernel/arm/fp32_grad/convolution_grad_input.cc index e7bbd1ce5b..72082b5066 100644 --- a/mindspore/lite/src/runtime/kernel/arm/fp32_grad/convolution_grad_input.cc +++ b/mindspore/lite/src/runtime/kernel/arm/fp32_grad/convolution_grad_input.cc @@ -51,11 +51,14 @@ int ConvolutionGradInputCPUKernel::Init() { conv_param->output_h_ = dy_tensor->shape()[kNHWC_H]; conv_param->output_w_ = dy_tensor->shape()[kNHWC_W]; + ws_size = chunk * conv_param->kernel_h_ * conv_param->kernel_w_ * conv_param->input_channel_ / conv_param->group_; - size_t ws_size = conv_param->output_h_ * conv_param->output_w_ * conv_param->kernel_h_ * conv_param->kernel_w_ * - conv_param->input_channel_ / conv_param->group_; + int n = conv_param->kernel_w_ * conv_param->kernel_h_ * conv_param->input_channel_ / conv_param->group_; + int k = conv_param->output_channel_ / conv_param->group_; - SetWorkspaceSize(ws_size * sizeof(float)); + size_t mat_alloc = MatSizeTotal(chunk, n, k, 0); + + SetWorkspaceSize((ws_size + mat_alloc) * sizeof(float)); return RET_OK; } @@ -88,16 +91,30 @@ int ConvolutionGradInputCPUKernel::Execute(int task_id) { int n = k_w * k_h * in_ch / groups; int k = out_ch / groups; float *workspace = reinterpret_cast(GetWorkspace()); - + float *mat_workspace = workspace + ws_size; memset(dx_addr, 0, sizeof(float) * batch * in_ch * in_h * in_w); - for (i = 0; i < batch; ++i) { for (j = 0; j < groups; ++j) { - float *mat_a = dy_addr + (i * groups) * m * k + j * (out_ch / groups); - float *mat_b = w_addr + j * nweights / groups; - float *mat_c = workspace; - gemm(0, 0, m, n, k, 1, mat_a, out_ch, mat_b, n, 0, mat_c, n); - col2im_hwc(mat_c, dx_addr + (i * groups) * (in_ch / groups) * in_h * in_w + j * (in_ch / groups), conv_param); + GemmCb gcb; + for (int ci = 0; ci < m; ci += chunk) { + float *mat_b; + if (ci == 0) { + mat_b = w_addr + j * nweights / groups; + gcb.ca = 0; + gcb.cb = 0; + gcb.bias = nullptr; + gcb.atype = ActType_No; + } else { + mat_b = gcb.mat_b; + gcb.cb = 1; + } + int real_chunk = MSMIN(m - ci, chunk); + float *mat_a = dy_addr + (i * groups) * m * k + j * (out_ch / groups) + ci * out_ch; + float *mat_c = workspace; + GemmMatmulPlus(0, 0, real_chunk, n, k, 1, mat_a, out_ch, mat_b, n, 0, mat_c, n, mat_workspace, &gcb); + rolling_col2im_hwc(mat_c, dx_addr + (i * groups) * (in_ch / groups) * in_h * in_w + j * (in_ch / groups), + conv_param, real_chunk, ci); + } } } diff --git a/mindspore/lite/src/runtime/kernel/arm/fp32_grad/convolution_grad_input.h b/mindspore/lite/src/runtime/kernel/arm/fp32_grad/convolution_grad_input.h index 6bea61b59c..d4b226dd9b 100644 --- a/mindspore/lite/src/runtime/kernel/arm/fp32_grad/convolution_grad_input.h +++ b/mindspore/lite/src/runtime/kernel/arm/fp32_grad/convolution_grad_input.h @@ -33,6 +33,14 @@ class ConvolutionGradInputCPUKernel : public LiteKernel { int ReSize() override; int Run() override; int Execute(int task_id); + + private: + size_t ws_size = 0; +#ifdef ENABLE_ARM32 + const int chunk = C4NUM; +#else + const int chunk = C12NUM; +#endif }; } // namespace mindspore::kernel diff --git a/mindspore/lite/src/runtime/kernel/arm/fp32_grad/deconvolution_grad_filter.cc b/mindspore/lite/src/runtime/kernel/arm/fp32_grad/deconvolution_grad_filter.cc index 0133ffb0f4..34e425d10c 100644 --- a/mindspore/lite/src/runtime/kernel/arm/fp32_grad/deconvolution_grad_filter.cc +++ b/mindspore/lite/src/runtime/kernel/arm/fp32_grad/deconvolution_grad_filter.cc @@ -50,10 +50,14 @@ int DeConvolutionGradFilterCPUKernel::Init() { conv_param->output_h_ = dy_tensor->shape()[kNHWC_H]; conv_param->output_w_ = dy_tensor->shape()[kNHWC_W]; - int ws_size = conv_param->input_h_ * conv_param->input_w_ * conv_param->kernel_h_ * conv_param->kernel_w_ * - conv_param->output_channel_ / conv_param->group_; + ws_size = chunk * conv_param->input_w_ * conv_param->kernel_h_ * conv_param->kernel_w_ * conv_param->output_channel_ / + conv_param->group_; - SetWorkspaceSize(ws_size * sizeof(float)); + int m = conv_param->input_channel_ / conv_param->group_; + int n = conv_param->kernel_h_ * conv_param->kernel_w_ * conv_param->output_channel_ / conv_param->group_; + size_t mat_alloc = MatSizeTotal(n, m, chunk * conv_param->input_w_, conv_param->input_channel_); + + SetWorkspaceSize((ws_size + mat_alloc) * sizeof(float)); return RET_OK; } @@ -82,21 +86,25 @@ int DeConvolutionGradFilterCPUKernel::Execute(int task_id) { int out_h = conv_param->output_h_; int out_w = conv_param->output_w_; - int m = in_ch / groups; - int n = k_h * k_w * out_ch / groups; - int k = in_h * in_w; + const int m = in_ch / groups; + const int n = k_h * k_w * out_ch / groups; float *workspace = reinterpret_cast(GetWorkspace()); + float *mat_workspace = workspace + ws_size; // zero out pointer memset(dw_addr, 0, out_dw->Size()); for (i = 0; i < batch; ++i) { for (j = 0; j < groups; ++j) { - float *mat_a = x_addr + (i * (in_ch * in_h * in_w) + j * (in_ch / groups)); - float *mat_b = workspace; - float *mat_c = dw_addr + j * m; - float *im = dy_addr + (i * (out_h * out_w * out_ch) + j * (out_ch / groups)); - im2row_hwc(im, mat_b, conv_param, true); - gemm(0, 0, n, m, k, 1, mat_b, k, mat_a, in_ch, 1, mat_c, in_ch); + for (int ci = 0; ci < in_h; ci += chunk) { + int real_chunk = MSMIN(in_h - ci, chunk); + float *mat_a = x_addr + (i * (in_ch * in_h * in_w) + j * (in_ch / groups)) + ci * in_w * in_ch; + float *mat_b = workspace; + float *mat_c = dw_addr + j * m; + float *im = dy_addr + (i * (out_h * out_w * out_ch) + j * (out_ch / groups)); + rolling_im2row_hwc(im, mat_b, conv_param, real_chunk, ci); + GemmMatmul(0, 0, n, m, real_chunk * in_w, 1, mat_b, real_chunk * in_w, mat_a, in_ch, 1, mat_c, in_ch, + mat_workspace); + } } } return RET_OK; diff --git a/mindspore/lite/src/runtime/kernel/arm/fp32_grad/deconvolution_grad_filter.h b/mindspore/lite/src/runtime/kernel/arm/fp32_grad/deconvolution_grad_filter.h index 0737cb1009..a95b4e484a 100644 --- a/mindspore/lite/src/runtime/kernel/arm/fp32_grad/deconvolution_grad_filter.h +++ b/mindspore/lite/src/runtime/kernel/arm/fp32_grad/deconvolution_grad_filter.h @@ -33,6 +33,10 @@ class DeConvolutionGradFilterCPUKernel : public LiteKernel { int ReSize() override; int Run() override; int Execute(int task_id); + + private: + size_t ws_size = 0; + const int chunk = 1; }; } // namespace mindspore::kernel diff --git a/mindspore/lite/src/runtime/kernel/arm/fp32_grad/dropout.cc b/mindspore/lite/src/runtime/kernel/arm/fp32_grad/dropout.cc new file mode 100644 index 0000000000..7fa2eafa8b --- /dev/null +++ b/mindspore/lite/src/runtime/kernel/arm/fp32_grad/dropout.cc @@ -0,0 +1,131 @@ +/** + * Copyright 2020 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include +#include "src/runtime/kernel/arm/fp32_grad/dropout.h" +#include "schema/model_generated.h" +#include "src/runtime/runtime_api.h" +#include "src/kernel_registry.h" +#include "include/errorcode.h" +#include "nnacl/fp32_grad/dropout_parameter.h" + +using mindspore::kernel::KERNEL_ARCH::kCPU; +using mindspore::lite::KernelRegistrar; +using mindspore::lite::RET_ERROR; +using mindspore::lite::RET_NULL_PTR; +using mindspore::lite::RET_OK; +using mindspore::schema::PrimitiveType_Dropout; + +namespace mindspore::kernel { + +int DropoutCPUKernel::Init() { + auto param = reinterpret_cast(op_parameter_); + if (param == nullptr) { + MS_LOG(ERROR) << "Dropout op_parameter_ nullptr"; + return RET_NULL_PTR; + } + + if ((param->ratio_ > 1.0f) || (param->ratio_ < 0.0f)) { + MS_LOG(ERROR) << "unsupported ratio value - Dropout ratio should be between zero to one"; + return RET_ERROR; + } + + if (param->ratio_ >= 1.0f) { + scale_ = 1.0f; + } else { + scale_ = 1. / (1. - param->ratio_); + } + if (!InferShapeDone()) { + return RET_OK; + } + return ReSize(); +} + +int DropoutCPUKernel::ReSize() { return RET_OK; } + +int DropoutCPUKernel::Execute(int task_id) { + auto input_ptr = reinterpret_cast(in_tensors_.at(kInputIndex)->MutableData()); + auto output_ptr = reinterpret_cast(out_tensors_.at(kOutputIndex)->MutableData()); + auto mask = reinterpret_cast(out_tensors_.at(1)->MutableData()); + auto length = in_tensors_.at(kInputIndex)->ElementsNum(); + auto param = reinterpret_cast(op_parameter_); + if (param == nullptr) { + MS_LOG(ERROR) << "Dropout op_parameter_ nullptr"; + return RET_NULL_PTR; + } + if (IsEval()) { + std::copy(input_ptr, input_ptr + length, output_ptr); + } else { + std::default_random_engine generator; + std::bernoulli_distribution distribution(param->ratio_); + + for (int i = 0; i < length; i++) { + mask[i] = distribution(generator); + output_ptr[i] = input_ptr[i] * mask[i] * scale_; + } + } + return RET_OK; +} + +int RunDropout(void *cdata, int task_id) { + auto dropout = reinterpret_cast(cdata); + auto error_code = dropout->Execute(task_id); + if (error_code != RET_OK) { + MS_LOG(ERROR) << "Dropout Run error task_id[" << task_id << "] error_code[" << error_code << "]"; + return RET_ERROR; + } + return RET_OK; +} + +int DropoutCPUKernel::Run() { + int error_code = ParallelLaunch(this->context_->thread_pool_, RunDropout, this, 1); + if (error_code != RET_OK) { + MS_LOG(ERROR) << "Dropout function error error_code[" << error_code << "]"; + return RET_ERROR; + } + return RET_OK; +} + +kernel::LiteKernel *CpuDropoutFp32KernelCreator(const std::vector &inputs, + const std::vector &outputs, OpParameter *opParameter, + const lite::InnerContext *ctx, const kernel::KernelKey &desc, + const mindspore::lite::PrimitiveC *primitive) { + if (opParameter == nullptr) { + MS_LOG(ERROR) << "Dropout opParameter nullptr."; + return nullptr; + } + if (desc.type != schema::PrimitiveType_Dropout) { + MS_LOG(ERROR) << "Dropout desc type should be " << schema::PrimitiveType_Dropout << " got " << desc.type; + return nullptr; + } + auto *kernel = new (std::nothrow) DropoutCPUKernel(opParameter, inputs, outputs, ctx, primitive); + if (kernel == nullptr) { + MS_LOG(ERROR) << "Dropout new kernel failed."; + return nullptr; + } + auto ret = kernel->Init(); + if (ret != RET_OK) { + delete kernel; + MS_LOG(ERROR) << "Init kernel failed, name: " << opParameter->name_ << ", type: " + << schema::EnumNamePrimitiveType(static_cast(opParameter->type_)); + return nullptr; + } + return kernel; +} + +REG_KERNEL(kCPU, kNumberTypeFloat32, PrimitiveType_Dropout, CpuDropoutFp32KernelCreator) +} // namespace mindspore::kernel diff --git a/mindspore/lite/src/runtime/kernel/arm/fp32_grad/dropout.h b/mindspore/lite/src/runtime/kernel/arm/fp32_grad/dropout.h new file mode 100644 index 0000000000..2b4093a73b --- /dev/null +++ b/mindspore/lite/src/runtime/kernel/arm/fp32_grad/dropout.h @@ -0,0 +1,43 @@ +/** + * Copyright 2020 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#ifndef MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_FP32_GRAD_DROPOUT_H_ +#define MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_FP32_GRAD_DROPOUT_H_ + +#include +#include "src/lite_kernel.h" + +namespace mindspore::kernel { +class DropoutCPUKernel : public LiteKernel { + public: + DropoutCPUKernel(OpParameter *parameter, const std::vector &inputs, + const std::vector &outputs, const lite::InnerContext *ctx, + const mindspore::lite::PrimitiveC *primitive) + : LiteKernel(parameter, inputs, outputs, ctx, primitive) {} + + ~DropoutCPUKernel() override = default; + + int Init() override; + int ReSize() override; + int Run() override; + int Execute(int task_id); + + private: + float scale_; +}; + +} // namespace mindspore::kernel + +#endif // MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_FP32_GRAD_DROPOUT_H_ diff --git a/mindspore/lite/src/runtime/kernel/arm/fp32_grad/dropout_grad.cc b/mindspore/lite/src/runtime/kernel/arm/fp32_grad/dropout_grad.cc new file mode 100644 index 0000000000..bb62ba40f8 --- /dev/null +++ b/mindspore/lite/src/runtime/kernel/arm/fp32_grad/dropout_grad.cc @@ -0,0 +1,118 @@ +/** + * Copyright 2020 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include "src/runtime/kernel/arm/fp32_grad/dropout_grad.h" +#include "nnacl/fp32_grad/dropout_grad.h" +#include "schema/model_generated.h" +#include "src/runtime/runtime_api.h" +#include "src/kernel_registry.h" +#include "include/errorcode.h" +#include "nnacl/fp32_grad/dropout_parameter.h" + +using mindspore::kernel::KERNEL_ARCH::kCPU; +using mindspore::lite::KernelRegistrar; +using mindspore::lite::RET_ERROR; +using mindspore::lite::RET_NULL_PTR; +using mindspore::lite::RET_OK; +using mindspore::schema::PrimitiveType_DropoutGrad; + +namespace mindspore::kernel { + +int DropoutGradCPUKernel::Init() { + auto param = reinterpret_cast(op_parameter_); + if (param == nullptr) { + MS_LOG(ERROR) << "Dropout op_parameter_ nullptr"; + return RET_NULL_PTR; + } + + if ((param->ratio_ > 1.0f) || (param->ratio_ < 0.0f)) { + MS_LOG(ERROR) << "unsupported ratio value - Dropout ratio should be between zero to one"; + return RET_ERROR; + } + + if (param->ratio_ >= 1.0f) { + scale_ = 1.0f; + } else { + scale_ = 1. / (1. - param->ratio_); + } + if (!InferShapeDone()) { + return RET_OK; + } + return ReSize(); +} + +int DropoutGradCPUKernel::ReSize() { return RET_OK; } + +int DropoutGradCPUKernel::Execute(int task_id) { + auto yt_ptr = reinterpret_cast(in_tensors_.at(kInputIndex)->MutableData()); + auto mask_ptr = reinterpret_cast(in_tensors_.at(1)->MutableData()); + auto output_ptr = reinterpret_cast(out_tensors_.at(kOutputIndex)->MutableData()); + auto length = in_tensors_.at(kInputIndex)->ElementsNum(); + DropoutGrad(yt_ptr, mask_ptr, output_ptr, length, scale_); + + return RET_OK; +} + +int RunDropoutGrad(void *cdata, int task_id) { + auto dropout = reinterpret_cast(cdata); + auto error_code = dropout->Execute(task_id); + if (error_code != RET_OK) { + MS_LOG(ERROR) << "Dropout Grad Run error task_id[" << task_id << "] error_code[" << error_code << "]"; + return RET_ERROR; + } + return RET_OK; +} + +int DropoutGradCPUKernel::Run() { + int error_code = ParallelLaunch(this->context_->thread_pool_, RunDropoutGrad, this, 1); + if (error_code != RET_OK) { + MS_LOG(ERROR) << "Dropout Grad function error error_code[" << error_code << "]"; + return RET_ERROR; + } + return RET_OK; +} + +kernel::LiteKernel *CpuDropoutGradFp32KernelCreator(const std::vector &inputs, + const std::vector &outputs, + OpParameter *opParameter, const lite::InnerContext *ctx, + const kernel::KernelKey &desc, + const mindspore::lite::PrimitiveC *primitive) { + if (opParameter == nullptr) { + MS_LOG(ERROR) << "DropoutGrad opParameter nullptr."; + return nullptr; + } + if (desc.type != schema::PrimitiveType_DropoutGrad) { + MS_LOG(ERROR) << "DropoutGrad desc type should be " << schema::PrimitiveType_DropoutGrad << " got " << desc.type; + return nullptr; + } + auto *kernel = new (std::nothrow) DropoutGradCPUKernel(opParameter, inputs, outputs, ctx, primitive); + if (kernel == nullptr) { + MS_LOG(ERROR) << "DropoutGrad new kernel failed."; + return nullptr; + } + auto ret = kernel->Init(); + if (ret != RET_OK) { + delete kernel; + MS_LOG(ERROR) << "Init kernel failed, name: " << opParameter->name_ << ", type: " + << schema::EnumNamePrimitiveType(static_cast(opParameter->type_)); + return nullptr; + } + return kernel; +} + +REG_KERNEL(kCPU, kNumberTypeFloat32, PrimitiveType_DropoutGrad, CpuDropoutGradFp32KernelCreator) +} // namespace mindspore::kernel diff --git a/mindspore/lite/src/runtime/kernel/arm/fp32_grad/dropout_grad.h b/mindspore/lite/src/runtime/kernel/arm/fp32_grad/dropout_grad.h new file mode 100644 index 0000000000..1740656dde --- /dev/null +++ b/mindspore/lite/src/runtime/kernel/arm/fp32_grad/dropout_grad.h @@ -0,0 +1,43 @@ +/** + * Copyright 2020 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#ifndef MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_FP32_GRAD_DROPOUT_GRAD_H_ +#define MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_FP32_GRAD_DROPOUT_GRAD_H_ + +#include +#include "src/lite_kernel.h" + +namespace mindspore::kernel { +class DropoutGradCPUKernel : public LiteKernel { + public: + DropoutGradCPUKernel(OpParameter *parameter, const std::vector &inputs, + const std::vector &outputs, const lite::InnerContext *ctx, + const mindspore::lite::PrimitiveC *primitive) + : LiteKernel(parameter, inputs, outputs, ctx, primitive) {} + + ~DropoutGradCPUKernel() override = default; + + int Init() override; + int ReSize() override; + int Run() override; + int Execute(int task_id); + + private: + float scale_; +}; + +} // namespace mindspore::kernel + +#endif // MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_FP32_GRAD_DROPOUT_GRAD_H_ diff --git a/mindspore/lite/src/runtime/kernel/arm/fp32_grad/softmax_cross_entropy_with_logits.cc b/mindspore/lite/src/runtime/kernel/arm/fp32_grad/softmax_cross_entropy_with_logits.cc index 6a8d7c55a9..762349084f 100644 --- a/mindspore/lite/src/runtime/kernel/arm/fp32_grad/softmax_cross_entropy_with_logits.cc +++ b/mindspore/lite/src/runtime/kernel/arm/fp32_grad/softmax_cross_entropy_with_logits.cc @@ -41,8 +41,7 @@ void SoftmaxCrossEntropyWithLogitsCPUKernel::ForwardPostExecute(const float *lab float logit = -logf(logits[i * param_->number_of_classes_ + j] <= 0.0 ? eps : logits[i * param_->number_of_classes_ + j]); grads[i * param_->number_of_classes_ + j] = - (logits[i * param_->number_of_classes_ + j] - labels[i * param_->number_of_classes_ + j]) / - param_->batch_size_; + (logits[i * param_->number_of_classes_ + j] - labels[i * param_->number_of_classes_ + j]); total_loss += labels[i * param_->number_of_classes_ + j] * logit; } } @@ -63,7 +62,7 @@ int SoftmaxCrossEntropyWithLogitsCPUKernel::Execute(int task_id) { auto labels = reinterpret_cast(in_tensors_.at(1)->MutableData()); float *out = reinterpret_cast(out_tensors_.at(0)->MutableData()); float *grads = NULL; - if (is_train() && out_tensors_.size() > 1) { + if (IsTrain() && out_tensors_.size() > 1) { grads = reinterpret_cast(out_tensors_.at(1)->MutableData()); } size_t data_size = in_tensors_.at(0)->ElementsNum(); diff --git a/mindspore/lite/src/runtime/kernel/arm/fp32_grad/sparse_softmax_cross_entropy_with_logits.cc b/mindspore/lite/src/runtime/kernel/arm/fp32_grad/sparse_softmax_cross_entropy_with_logits.cc index 01135de869..1d3fb6feb2 100644 --- a/mindspore/lite/src/runtime/kernel/arm/fp32_grad/sparse_softmax_cross_entropy_with_logits.cc +++ b/mindspore/lite/src/runtime/kernel/arm/fp32_grad/sparse_softmax_cross_entropy_with_logits.cc @@ -86,7 +86,7 @@ int SparseSoftmaxCrossEntropyWithLogitsCPUKernel::Execute(int task_id) { auto labels = reinterpret_cast(in_tensors_.at(1)->data_c()); float *out = reinterpret_cast(out_tensors_.at(0)->data_c()); float *grads = NULL; - if (is_train() && out_tensors_.size() > 1) { + if (IsTrain() && out_tensors_.size() > 1) { grads = reinterpret_cast(out_tensors_.at(1)->MutableData()); } size_t data_size = in_tensors_.at(0)->ElementsNum(); @@ -99,7 +99,7 @@ int SparseSoftmaxCrossEntropyWithLogitsCPUKernel::Execute(int task_id) { std::fill(losses_, losses_ + data_size, 0.f); std::fill(sum_data_, sum_data_ + sm_params_.input_shape_[0], 0.f); Softmax(ins, losses_, sum_data_, &sm_params_); - if (is_train()) { + if (IsTrain()) { GradPostExecute(labels, losses_, grads, out); } else { ForwardPostExecute(labels, losses_, out); diff --git a/mindspore/lite/src/train/train_populate_parameter.cc b/mindspore/lite/src/train/train_populate_parameter.cc index c490b34a25..3d5f4c028e 100644 --- a/mindspore/lite/src/train/train_populate_parameter.cc +++ b/mindspore/lite/src/train/train_populate_parameter.cc @@ -36,6 +36,10 @@ #include "src/ops/bn_grad.h" #include "nnacl/fp32_grad/batch_norm.h" #include "src/ops/adam.h" +#include "nnacl/fp32_grad/dropout_parameter.h" +#include "src/ops/dropout.h" +#include "src/ops/dropout_grad.h" +#include "src/ops/arithmetic.h" #include "src/ops/oneslike.h" #include "src/ops/binary_cross_entropy.h" #include "src/ops/binary_cross_entropy_grad.h" @@ -399,10 +403,66 @@ OpParameter *PopulateBNGradParameter(const mindspore::lite::PrimitiveC *primitiv bnGrad_param->op_parameter_.type_ = primitive->Type(); auto bngrad = reinterpret_cast(const_cast(primitive)); bnGrad_param->epsilon_ = bngrad->GetEps(); - bnGrad_param->momentum_ = 0.1; + bnGrad_param->momentum_ = bngrad->GetMomentum(); return reinterpret_cast(bnGrad_param); } +OpParameter *PopulateDropoutParameter(const mindspore::lite::PrimitiveC *primitive) { + DropoutParameter *dropout_parameter = reinterpret_cast(malloc(sizeof(DropoutParameter))); + if (dropout_parameter == nullptr) { + MS_LOG(ERROR) << "malloc Dropout Parameter failed."; + return nullptr; + } + memset(dropout_parameter, 0, sizeof(DropoutParameter)); + dropout_parameter->op_parameter_.type_ = primitive->Type(); + auto param = reinterpret_cast(const_cast(primitive)); + dropout_parameter->ratio_ = param->GetRatio(); + if (dropout_parameter->ratio_ < 0.f || dropout_parameter->ratio_ > 1.f) { + MS_LOG(ERROR) << "Dropout ratio must be between 0 to 1, got " << dropout_parameter->ratio_; + free(dropout_parameter); + return nullptr; + } + return reinterpret_cast(dropout_parameter); +} + +OpParameter *PopulateDropoutGradParameter(const mindspore::lite::PrimitiveC *primitive) { + DropoutParameter *dropoutGrad_parameter = reinterpret_cast(malloc(sizeof(DropoutParameter))); + if (dropoutGrad_parameter == nullptr) { + MS_LOG(ERROR) << "malloc Dropout Grad Parameter failed."; + return nullptr; + } + memset(dropoutGrad_parameter, 0, sizeof(DropoutParameter)); + dropoutGrad_parameter->op_parameter_.type_ = primitive->Type(); + auto param = reinterpret_cast(const_cast(primitive)); + dropoutGrad_parameter->ratio_ = param->GetRatio(); + if (dropoutGrad_parameter->ratio_ < 0.f || dropoutGrad_parameter->ratio_ > 1.f) { + MS_LOG(ERROR) << "Dropout Grad ratio must be between 0 to 1, got " << dropoutGrad_parameter->ratio_; + free(dropoutGrad_parameter); + return nullptr; + } + return reinterpret_cast(dropoutGrad_parameter); +} + +OpParameter *PopulateArithmeticGradParameter(const mindspore::lite::PrimitiveC *primitive) { + ArithmeticParameter *arithmetic_param = reinterpret_cast(malloc(sizeof(ArithmeticParameter))); + if (arithmetic_param == nullptr) { + MS_LOG(ERROR) << "malloc ArithmeticParameter failed."; + return nullptr; + } + memset(arithmetic_param, 0, sizeof(ArithmeticParameter)); + arithmetic_param->op_parameter_.type_ = primitive->Type(); + arithmetic_param->broadcasting_ = ((lite::Arithmetic *)primitive)->Broadcasting(); + arithmetic_param->ndim_ = ((lite::Arithmetic *)primitive)->NDims(); + + auto tmp_shape = ((lite::Arithmetic *)primitive)->InShape0(); + memcpy(arithmetic_param->in_shape0_, static_cast(tmp_shape.data()), tmp_shape.size() * sizeof(int)); + tmp_shape = ((lite::Arithmetic *)primitive)->InShape1(); + memcpy(arithmetic_param->in_shape1_, static_cast(tmp_shape.data()), tmp_shape.size() * sizeof(int)); + tmp_shape = ((lite::Arithmetic *)primitive)->OutputShape(); + memcpy(arithmetic_param->out_shape_, static_cast(tmp_shape.data()), tmp_shape.size() * sizeof(int)); + return reinterpret_cast(arithmetic_param); +} + void PopulateTrainParameters() { lite::Registry ApplyMomentumParameterRegistry(schema::PrimitiveType_ApplyMomentum, PopulateApplyMomentumParameter); lite::Registry BiasGradParameterRegistry(schema::PrimitiveType_BiasGrad, PopulateBiasGradParameter); @@ -430,6 +490,10 @@ void PopulateTrainParameters() { lite::Registry OnesLikeParameterRegistry(schema::PrimitiveType_OnesLike, DefaultPopulateParameter); lite::Registry UnsortedSegmentSumParameterRegistry(schema::PrimitiveType_UnsortedSegmentSum, DefaultPopulateParameter); + lite::Registry DropoutParameterRegistry(schema::PrimitiveType_Dropout, PopulateDropoutParameter); + lite::Registry DropGradParameterRegistry(schema::PrimitiveType_DropoutGrad, PopulateDropoutGradParameter); + lite::Registry MaximumGradParameterRegistry(schema::PrimitiveType_MaximumGrad, PopulateArithmeticGradParameter); + lite::Registry MinimumGradParameterRegistry(schema::PrimitiveType_MinimumGrad, PopulateArithmeticGradParameter); } } // namespace mindspore::kernel diff --git a/mindspore/lite/src/train/train_session.cc b/mindspore/lite/src/train/train_session.cc index 34b427139f..1257089069 100644 --- a/mindspore/lite/src/train/train_session.cc +++ b/mindspore/lite/src/train/train_session.cc @@ -15,9 +15,12 @@ */ #include "src/train/train_session.h" +#include #include #include #include +#include +#include #include "include/errorcode.h" #include "include/train_model.h" #include "src/common/utils.h" @@ -98,6 +101,21 @@ int TrainSession::CompileTrainGraph(mindspore::lite::TrainModel *model) { for (auto inTensor : inputs_) inTensor->MutableData(); RestoreOps(restore); AllocWorkSpace(); + MarkOptimizedKernels(); + CompileTrainKernels(); + if (train_mode_) { + auto ret1 = Train(); + if (ret1 != RET_OK) { + MS_LOG(ERROR) << "faild to initialize network in train mode"; + return RET_ERROR; + } + } else { + auto ret1 = Eval(); + if (ret1 != RET_OK) { + MS_LOG(ERROR) << "faild to initialize network in eval mode"; + return RET_ERROR; + } + } return ret; } @@ -110,34 +128,67 @@ void *TrainSession::ExportToBuf(char *buf, size_t *len) const { return model_->E int TrainSession::RunGraph(const KernelCallBack &before, const KernelCallBack &after) { this->outputs_.clear(); - for (auto ms_tensors : output_node_map_) - for (auto ms_tensor : ms_tensors.second) this->outputs_.push_back((static_cast(ms_tensor))); - if (train_mode_) return lite::LiteSession::RunGraph(before, after); + + // build out tensor + for (auto ms_tensors : output_node_map_) { + for (auto ms_tensor : ms_tensors.second) { + this->outputs_.push_back((static_cast(ms_tensor))); + } + } if (this->context_ == nullptr) { MS_LOG(ERROR) << "context is null"; return lite::RET_NULL_PTR; } + auto run_kernel = (train_mode_) ? train_kernels_ : inference_kernels_; lite::Executor executor; if (before == nullptr && after == nullptr) { - return executor.Run(this->inputs_, this->outputs_, inference_kernels_, this->context_->allocator.get()); + return executor.Run(this->inputs_, this->outputs_, run_kernel, this->context_->allocator.get()); } else { - return executor.Run(this->inputs_, this->outputs_, inference_kernels_, this->context_->allocator.get(), before, - after); + return executor.Run(this->inputs_, this->outputs_, run_kernel, this->context_->allocator.get(), before, after); } } -void TrainSession::Train() { +int TrainSession::SaveToFile(const std::string &filename) const { + size_t fb_size = 0; + auto *buf = reinterpret_cast(ExportToBuf(nullptr, &fb_size)); + if (buf == NULL) { + MS_LOG(ERROR) << "Could not Export Trained model"; + return lite::RET_NULL_PTR; + } + std::ofstream ofs(filename); + if ((true != ofs.good()) || (true != ofs.is_open())) { + MS_LOG(ERROR) << "Could not open file \"" << filename << "\" for writing"; + free(buf); + return RET_ERROR; + } + + ofs.seekp(0, std::ios::beg); + ofs.write(buf, fb_size); + ofs.close(); + free(buf); + return chmod(filename.c_str(), S_IRUSR); +} + +int TrainSession::Train() { for (auto ori_kernel : kernels_) { MS_ASSERT(nullptr != ori_kernel); if (ori_kernel->subgraph_type() == kernel::kNotSubGraph) { - ori_kernel->train(); + auto ret = ori_kernel->Train(); + if (ret != RET_OK) { + MS_LOG(ERROR) << ori_kernel->name() << " failed to set train mode"; + return RET_ERROR; + } } else { auto sub_graph = reinterpret_cast(ori_kernel); MS_ASSERT(nullptr != sub_graph); for (auto kernel : sub_graph->nodes()) { MS_ASSERT(nullptr != kernel); - kernel->train(); + auto ret = kernel->Train(); + if (ret != RET_OK) { + MS_LOG(ERROR) << kernel->name() << " failed to set train mode"; + return RET_ERROR; + } } } } @@ -157,6 +208,7 @@ void TrainSession::Train() { } } } + return RET_OK; } void TrainSession::UpdateOutputMapByLossKernel(const kernel::LiteKernel *kernel) { @@ -190,17 +242,25 @@ void TrainSession::UpdateOutputMapByInKernel(const kernel::LiteKernel *kernel) { } } -void TrainSession::Eval() { +int TrainSession::Eval() { for (auto ori_kernel : kernels_) { MS_ASSERT(nullptr != ori_kernel); if (ori_kernel->subgraph_type() == kernel::kNotSubGraph) { - ori_kernel->eval(); + auto ret = ori_kernel->Eval(); + if (ret != RET_OK) { + MS_LOG(ERROR) << ori_kernel->name() << " failed to set eval mode"; + return RET_ERROR; + } } else { auto sub_graph = reinterpret_cast(ori_kernel); MS_ASSERT(nullptr != sub_graph); for (auto kernel : sub_graph->nodes()) { MS_ASSERT(nullptr != kernel); - kernel->eval(); + auto ret = kernel->Eval(); + if (ret != RET_OK) { + MS_LOG(ERROR) << kernel->name() << " failed to set eval mode"; + return RET_ERROR; + } } } } @@ -221,6 +281,7 @@ void TrainSession::Eval() { if (inference_kernels_.size() == 0) { BuildInferenceKernelsMap(); } + return RET_OK; } void TrainSession::BuildInferenceKernelsRecursive(kernel::LiteKernel *kernel, std::vector *v) { @@ -234,24 +295,25 @@ void TrainSession::BuildInferenceKernelsRecursive(kernel::LiteKernel *kernel, st void TrainSession::BuildInferenceKernelsMap() { std::vector req_kernels; - for (auto ori_kernel : kernels_) { - if (ori_kernel->subgraph_type() == kernel::kNotSubGraph) { - if (IsLossKernel(ori_kernel)) { // For each loss in the system add backward tree - for (auto in_node : ori_kernel->in_kernels()) { + for (auto kernel : this->kernels_) { + if (kernel->subgraph_type() == kernel::kNotSubGraph) { + if (IsLossKernel(kernel)) { // For each loss in the system add backward tree + for (auto in_node : kernel->in_kernels()) { BuildInferenceKernelsRecursive(in_node, &req_kernels); } } } else { - auto sub_graph = reinterpret_cast(ori_kernel); - for (auto kernel : sub_graph->nodes()) { - if (IsLossKernel(kernel)) { // For each loss in the system add backward tree - for (auto in_node : kernel->in_kernels()) { + auto sub_graph = reinterpret_cast(kernel); + for (auto sb_kernel : sub_graph->nodes()) { + if (IsLossKernel(sb_kernel)) { // For each loss in the system add backward tree + for (auto in_node : sb_kernel->in_kernels()) { BuildInferenceKernelsRecursive(in_node, &req_kernels); } } } } } + inference_kernels_.clear(); for (auto ori_kernel : kernels_) { if (ori_kernel->subgraph_type() == kernel::kNotSubGraph) { @@ -272,10 +334,71 @@ void TrainSession::BuildInferenceKernelsMap() { } } -bool TrainSession::IsLossKernel(const kernel::LiteKernel *kernel) { +void TrainSession::CompileTrainKernels() { + train_kernels_.clear(); + for (auto ori_kernel : kernels_) { + if (ori_kernel->subgraph_type() == kernel::kNotSubGraph) { + train_kernels_.push_back(ori_kernel); + } else { + auto sub_graph = reinterpret_cast(ori_kernel); + for (auto kernel : sub_graph->nodes()) { + train_kernels_.push_back(kernel); + } + } + } +} + +void TrainSession::MarkOptimizedKernels() { + std::vector ot; + for (auto kernel : this->kernels_) { + if (kernel->subgraph_type() == kernel::kNotSubGraph) { + if (IsOptimizer(kernel)) { + std::copy(kernel->in_tensors().begin(), kernel->in_tensors().end(), std::back_inserter(ot)); + } + } else { + auto sub_graph = reinterpret_cast(kernel); + for (auto sb_kernel : sub_graph->nodes()) { + if (IsOptimizer(sb_kernel)) { + std::copy(sb_kernel->in_tensors().begin(), sb_kernel->in_tensors().end(), std::back_inserter(ot)); + } + } + } + } + for (auto kernel : this->kernels_) { + if (kernel->subgraph_type() == kernel::kNotSubGraph) { + if (!IsOptimizer(kernel)) { + for (auto it : kernel->in_tensors()) { + if (std::find(ot.begin(), ot.end(), it) != ot.end()) { + kernel->SetTrainable(true); + break; + } + } + } + } else { + auto sub_graph = reinterpret_cast(kernel); + for (auto sb_kernel : sub_graph->nodes()) { + if (!IsOptimizer(sb_kernel)) { + for (auto it : sb_kernel->in_tensors()) { + if (std::find(ot.begin(), ot.end(), it) != ot.end()) { + sb_kernel->SetTrainable(true); + break; + } + } + } + } + } + } +} + +bool TrainSession::IsLossKernel(const kernel::LiteKernel *kernel) const { return (kernel->Type() == schema::PrimitiveType_SoftmaxCrossEntropy); } +bool TrainSession::IsOptimizer(kernel::LiteKernel *kernel) const { + return ((kernel->Type() == schema::PrimitiveType_Adam) || (kernel->Type() == schema::PrimitiveType_Sgd) || + (kernel->Type() == schema::PrimitiveType_ApplyMomentum)); +} + } // namespace lite session::TrainSession *session::TrainSession::CreateSession(lite::Context *context) { diff --git a/mindspore/lite/src/train/train_session.h b/mindspore/lite/src/train/train_session.h index 6e676a3957..e11a8d92d3 100644 --- a/mindspore/lite/src/train/train_session.h +++ b/mindspore/lite/src/train/train_session.h @@ -55,9 +55,10 @@ class TrainSession : virtual public session::TrainSession, virtual public lite:: int CompileTrainGraph(lite::TrainModel *model) override; void *ExportToBuf(char *buf, size_t *len) const override; + int SaveToFile(const std::string &filename) const override; - void Train() override; - void Eval() override; + int Train() override; + int Eval() override; void BindThread(bool if_bind) override { return lite::LiteSession::BindThread(if_bind); } std::vector GetInputs() const override { return lite::LiteSession::GetInputs(); } @@ -84,16 +85,19 @@ class TrainSession : virtual public session::TrainSession, virtual public lite:: protected: void AllocWorkSpace(); - bool IsLossKernel(const kernel::LiteKernel *kernel); + bool IsLossKernel(const kernel::LiteKernel *kernel) const; + bool IsOptimizer(kernel::LiteKernel *kernel) const; + virtual void MarkOptimizedKernels(); virtual std::vector ReplaceOps(); virtual void RestoreOps(const std::vector &restore); virtual void BuildInferenceKernelsMap(); virtual void BuildInferenceKernelsRecursive(kernel::LiteKernel *ker, std::vector *req_kernels); - + virtual void CompileTrainKernels(); TrainModel *model_ = nullptr; std::unordered_map> orig_output_map_; std::unordered_map orig_output_tensor_map_; std::vector inference_kernels_; + std::vector train_kernels_; }; } // namespace lite } // namespace mindspore diff --git a/mindspore/lite/test/models_ms_train.cfg b/mindspore/lite/test/models_ms_train.cfg new file mode 100644 index 0000000000..60192579f4 --- /dev/null +++ b/mindspore/lite/test/models_ms_train.cfg @@ -0,0 +1,8 @@ +mini_alexnet +mobilenetv1 +mobilenetv2 +mobilenetv3 +lenet +effnet +effnet_tune +resnet diff --git a/mindspore/lite/test/run_net_train.sh b/mindspore/lite/test/run_net_train.sh new file mode 100755 index 0000000000..0a616457d2 --- /dev/null +++ b/mindspore/lite/test/run_net_train.sh @@ -0,0 +1,394 @@ +#!/bin/bash + +# Run Export on x86 platform and create output test files: +function Run_Export(){ + cd $models_path || exit 1 + if [[ -z "${CLOUD_MODEL_ZOO}" ]]; then + echo "CLOUD_MODEL_ZOO is not defined - exiting export models" + exit 1 + fi + # Export mindspore train models: + while read line; do + model_name=${line} + if [[ $model_name == \#* ]]; then + continue + fi + echo ${model_name}'_train_export.py' >> "${export_log_file}" + echo 'exporting' ${model_name} + echo 'docker run --user $(id -u):$(id -g) --env CLOUD_MODEL_ZOO=${CLOUD_MODEL_ZOO} -w $PWD --runtime=nvidia -v /home/$USER:/home/$USER -v /opt/share:/opt/share --privileged=true mindspore_dev:5 python '${models_path}'/'${model_name}'_train_export.py' >> "${export_log_file}" + docker run --user $(id -u):$(id -g) --env CLOUD_MODEL_ZOO=${CLOUD_MODEL_ZOO} -w $PWD --runtime=nvidia -v /home/$USER:/home/$USER -v /opt/share:/opt/share --privileged=true mindspore_dev:5 python ${models_path}'/'${model_name}_train_export.py + if [ $? = 0 ]; then + export_result='export mindspore '${model_name}'_train_export pass';echo ${export_result} >> ${export_result_file} + else + export_result='export mindspore '${model_name}'_train_export failed';echo ${export_result} >> ${export_result_file} + fi + done < ${models_mindspore_train_config} +} + +# Run converter on x86 platform: +function Run_Converter() { + # Unzip x86 runtime and convertor + cd ${x86_path} || exit 1 + tar -zxf mindspore-lite-${version}-runtime-x86-${process_unit_x86}-train.tar.gz || exit 1 + + tar -zxf mindspore-lite-${version}-converter-ubuntu-train.tar.gz || exit 1 + cd ${x86_path}/mindspore-lite-${version}-converter-ubuntu-train || exit 1 + cp converter/converter_lite ./ || exit 1 + + + # Convert the models + cd ${x86_path}/mindspore-lite-${version}-converter-ubuntu-train || exit 1 + + rm -rf ${ms_models_path} + mkdir -p ${ms_models_path} + + # Convert mindspore train models: + while read line; do + model_name=${line} + if [[ $model_name == \#* ]]; then + continue + fi + echo ${model_name}'_train' >> "${run_converter_log_file}" + echo './converter_lite --fmk=MINDIR --modelFile='${models_path}'/'${model_name}'_train.mindir --outputFile='${ms_models_path}'/'${model_name}'_train --trainModel=true' >> "${run_converter_log_file}" + LD_LIBRARY_PATH=./lib/:./third_party/protobuf/lib:./third_party/flatbuffers/lib:./third_party/glog/lib \ + ./converter_lite --fmk=MINDIR --modelFile=${models_path}/${model_name}_train.mindir \ + --outputFile=${ms_models_path}/${model_name}'_train' \ + --trainModel=true + if [ $? = 0 ]; then + converter_result='converter mindspore '${model_name}'_train pass';echo ${converter_result} >> ${run_converter_result_file} + else + converter_result='converter mindspore '${model_name}'_train failed';echo ${converter_result} >> ${run_converter_result_file} + fi + done < ${models_mindspore_train_config} +} + +# Run on x86 platform: +function Run_x86() { + # Run mindspore converted train models: + while read line; do + model_name=${line} + if [[ $model_name == \#* ]]; then + continue + fi + + echo ${model_name}'_train' >> "${run_x86_log_file}" + echo 'cd '${x86_path}'/mindspore-lite-'${version}'-runtime-x86-'${process_unit_x86}-train >> "${run_x86_log_file}" + cd ${x86_path}/mindspore-lite-${version}-runtime-x86-${process_unit_x86}-train || return 1 + echo 'LD_LIBRARY_PATH='${LD_LIBRARY_PATH}':./lib:./third_party/libjpeg-turbo/lib:./third_party/opencv/lib;./net_train/net_train --modelFile='${ms_models_path}'/'${model_name}'_train.ms --inDataFile='${input_path}'/'${model_name}'_input1.bin,'${train_io_path}'/'${model_name}'_input2.bin --expectedDataFile='${train_io_path}'/'${model_name}'_outputs.bin --exportFile='${ms_models_path}'/'${model_name}'_train_exported.ms' >> "${run_x86_log_file}" + echo '-------------------------------------------------------------------------------' >> "${run_x86_log_file}" + LD_LIBRARY_PATH=${LD_LIBRARY_PATH}:./lib:./third_party/libjpeg-turbo/lib:./third_party/opencv/lib \ + ${run_valgrind}./net_train/net_train \ + --modelFile=${ms_models_path}/${model_name}_train.ms \ + --inDataFile=${train_io_path}/${model_name}_input1.bin,${train_io_path}/${model_name}_input2.bin \ + --expectedDataFile=${train_io_path}/${model_name}_outputs.bin \ + --exportFile=${ms_models_path}/${model_name}_train_exported.ms >> "${run_x86_log_file}" + if [ $? = 0 ]; then + run_result='x86: '${model_name}'_train pass'; echo ${run_result} >> ${run_net_train_result_file} + else + run_result='x86: '${model_name}'_train failed'; echo ${run_result} >> ${run_net_train_result_file} + fi + done < ${models_mindspore_train_config} +} + +# Run on arm platform: +# Gets a parameter - arm64/arm32 +function Run_arm() { + if [ "$1" == arm64 ]; then + arm_path=${arm64_path} + process_unit=${process_unit_arm64} + version_arm=${version_arm64} + run_arm_log_file=${run_arm64_log_file} + adb_cmd_run_file=${adb_cmd_arm64_run_file} + adb_push_log_file=${adb_push_arm64_log_file} + adb_cmd_file=${adb_cmd_arm64_file} + elif [ "$1" == arm32 ]; then + arm_path=${arm32_path} + process_unit=${process_unit_arm32} + version_arm=${version_arm32} + run_arm_log_file=${run_arm32_log_file} + adb_cmd_run_file=${adb_cmd_arm32_run_file} + adb_push_log_file=${adb_push_arm32_log_file} + adb_cmd_file=${adb_cmd_arm32_file} + else + echo 'type ' $1 'is not supported' + exit 1 + fi + arm_type=$1 + + # Unzip + cd ${arm_path} || exit 1 + tar -zxf mindspore-lite-${version_arm}-runtime-${arm_type}-${process_unit}-train.tar.gz || exit 1 + + # If build with minddata, copy the minddata related libs + cd ${net_train_test_path} || exit 1 + if [ -f ${arm_path}/mindspore-lite-${version_arm}-runtime-${arm_type}-${process_unit}-train/lib/libminddata-lite.so ]; then + cp -a ${arm_path}/mindspore-lite-${version_arm}-runtime-${arm_type}-${process_unit}-train/third_party/libjpeg-turbo/lib/libjpeg.so ${net_train_test_path}/libjpeg.so || exit 1 + cp -a ${arm_path}/mindspore-lite-${version_arm}-runtime-${arm_type}-${process_unit}-train/third_party/libjpeg-turbo/lib/libturbojpeg.so ${net_train_test_path}/libturbojpeg.so || exit 1 + cp -a ${arm_path}/mindspore-lite-${version_arm}-runtime-${arm_type}-${process_unit}-train/third_party/opencv/lib/libopencv_core.so ${net_train_test_path}/libopencv_core.so || exit 1 + cp -a ${arm_path}/mindspore-lite-${version_arm}-runtime-${arm_type}-${process_unit}-train/third_party/opencv/lib/libopencv_imgcodecs.so ${net_train_test_path}/libopencv_imgcodecs.so || exit 1 + cp -a ${arm_path}/mindspore-lite-${version_arm}-runtime-${arm_type}-${process_unit}-train/third_party/opencv/lib/libopencv_imgproc.so ${net_train_test_path}/libopencv_imgproc.so || exit 1 + cp -a ${arm_path}/mindspore-lite-${version_arm}-runtime-${arm_type}-${process_unit}-train/lib/libminddata-lite.so ${net_train_test_path}/libminddata-lite.so || exit 1 + fi + + cp -a ${arm_path}/mindspore-lite-${version_arm}-runtime-${arm_type}-${process_unit}-train/lib/libmindspore-lite.so ${net_train_test_path}/libmindspore-lite.so || exit 1 + if [ "$1" == arm64 ]; then + cp -a ${arm_path}/mindspore-lite-${version_arm}-runtime-${arm_type}-${process_unit}-train/lib/libmindspore-lite-fp16.so ${net_train_test_path}/libmindspore-lite-fp16.so || exit 1 + cp -a ${arm_path}/mindspore-lite-${version_arm}-runtime-${arm_type}-${process_unit}-train/lib/libmindspore-lite-optimize.so ${net_train_test_path}/libmindspore-lite-optimize.so || exit 1 + fi + cp -a ${arm_path}/mindspore-lite-${version_arm}-runtime-${arm_type}-${process_unit}-train/net_train/net_train ${net_train_test_path}/net_train || exit 1 + + # adb push all needed files to the phone + adb -s ${device_id} push ${net_train_test_path} /data/local/tmp/ > ${adb_push_log_file} + + # run adb ,run session ,check the result: + echo 'cd /data/local/tmp/net_train_test' > ${adb_cmd_file} + echo 'chmod 777 net_train' >> ${adb_cmd_file} + + adb -s ${device_id} shell < ${adb_cmd_file} + + # Run mindir converted train models: + while read line; do + model_name=${line} + if [[ $model_name == \#* ]]; then + continue + fi + + # run net_train test without clib data + echo ${model_name}'_train' >> "${run_arm_log_file}" + adb -s ${device_id} push ${train_io_path}/${model_name}_input*.bin ${train_io_path}/${model_name}_outputs.bin /data/local/tmp/net_train_test >> ${adb_push_log_file} + echo 'cd /data/local/tmp/net_train_test' > ${adb_cmd_run_file} + if [ "$1" == arm64 ]; then + echo 'export LD_LIBRARY_PATH=/data/local/tmp/net_train_test;./net_train --modelFile='${model_name}'_train.ms --inDataFile=/data/local/tmp/net_train_test/'${model_name}'_input1.bin,/data/local/tmp/net_train_test/'${model_name}'_input2.bin --expectedDataFile=/data/local/tmp/net_train_test/'${model_name}'_outputs.bin --exportFile='${model_name}'_train_exported.ms' >> "${run_arm_log_file}" + echo 'export LD_LIBRARY_PATH=/data/local/tmp/net_train_test;./net_train --modelFile='${model_name}'_train.ms --inDataFile=/data/local/tmp/net_train_test/'${model_name}'_input1.bin,/data/local/tmp/net_train_test/'${model_name}'_input2.bin --expectedDataFile=/data/local/tmp/net_train_test/'${model_name}'_outputs.bin --exportFile='${model_name}'_train_exported.ms' >> "${adb_cmd_run_file}" + elif [ "$1" == arm32 ]; then + echo 'export LD_LIBRARY_PATH=/data/local/tmp/:/data/local/tmp/net_train_test;./net_train --modelFile='${model_name}'_train.ms --inDataFile=/data/local/tmp/net_train_test/'${model_name}'_input1.bin,/data/local/tmp/net_train_test/'${model_name}'_input2.bin --expectedDataFile=/data/local/tmp/net_train_test/'${model_name}'_outputs.bin --exportFile='${model_name}'_train_exported.ms' >> "${run_arm_log_file}" + echo 'export LD_LIBRARY_PATH=/data/local/tmp/:/data/local/tmp/net_train_test;./net_train --modelFile='${model_name}'_train.ms --inDataFile=/data/local/tmp/net_train_test/'${model_name}'_input1.bin,/data/local/tmp/net_train_test/'${model_name}'_input2.bin --expectedDataFile=/data/local/tmp/net_train_test/'${model_name}'_outputs.bin --exportFile='${model_name}'_train_exported.ms' >> "${adb_cmd_run_file}" + fi + + adb -s ${device_id} shell < ${adb_cmd_run_file} >> ${run_arm_log_file} + # TODO: change to arm_type + if [ $? = 0 ]; then + run_result=$1': '${model_name}'_train pass'; echo ${run_result} >> ${run_net_train_result_file} + else + run_result=$1': '${model_name}'_train failed'; echo ${run_result} >> ${run_net_train_result_file} + fi + done < ${models_mindspore_train_config} +} + +# Print start msg before run testcase +function MS_PRINT_TESTCASE_START_MSG() { + echo "" + echo -e "-----------------------------------------------------------------------------------------------------------------------------------" + echo -e "env Testcase Result " + echo -e "--- -------- ------ " +} + +# Print start msg after run testcase +function MS_PRINT_TESTCASE_END_MSG() { + echo -e "-----------------------------------------------------------------------------------------------------------------------------------" +} + +function Print_Result() { + MS_PRINT_TESTCASE_END_MSG + while read line; do + arr=("${line}") + printf "%-15s %-20s %-90s %-7s\n" ${arr[0]} ${arr[1]} ${arr[2]} ${arr[3]} + done < $1 + MS_PRINT_TESTCASE_END_MSG +} + +basepath=$(pwd) +echo ${basepath} + +# Example:run_net_train.sh -r /home/emir/Work/TestingEnv/release -m /home/emir/Work/TestingEnv/train_models -i /home/emir/Work/TestingEnv/train_io -d "8KE5T19620002408" +# For running on arm64, use -t to set platform tools path (for using adb commands) +while getopts "r:m:d:i:e:v" opt; do + case ${opt} in + r) + release_path=${OPTARG} + echo "release_path is ${OPTARG}" + ;; + m) + models_path=${OPTARG} + echo "models_path is ${OPTARG}" + ;; + i) + train_io_path=${OPTARG} + echo "train_io_path is ${OPTARG}" + ;; + d) + device_id=${OPTARG} + echo "device_id is ${OPTARG}" + ;; + e) + enable_export=${OPTARG} + echo "enable_export = ${OPTARG}" + ;; + v) + run_valgrind="valgrind " + echo "Run x86 with valgrind" + ;; + ?) + echo "unknown para" + exit 1;; + esac +done + +arm64_path=${release_path}/android_aarch64 +file=$(ls ${arm64_path}/*runtime-arm64*train.tar.gz) +file_name="${file##*/}" +IFS="-" read -r -a file_name_array <<< "$file_name" +version_arm64=${file_name_array[2]} +process_unit_arm64=${file_name_array[5]} + +arm32_path=${release_path}/android_aarch32 +file=$(ls ${arm32_path}/*runtime-arm32*train.tar.gz) +file_name="${file##*/}" +IFS="-" read -r -a file_name_array <<< "$file_name" +version_arm32=${file_name_array[2]} +process_unit_arm32=${file_name_array[5]} + +x86_path=${release_path}/ubuntu_x86 +file=$(ls ${x86_path}/*runtime-x86*train.tar.gz) +file_name="${file##*/}" +IFS="-" read -r -a file_name_array <<< "$file_name" +version=${file_name_array[2]} +process_unit_x86=${file_name_array[5]} + +# Set models config filepath +models_mindspore_train_config=${basepath}/models_ms_train.cfg + +ms_models_path=${models_path}/ms_models + +logs_path=${models_path}/logs +rm -rf ${logs_path} +mkdir -p ${logs_path} + +# Export model if enabled +if [[ $enable_export == 1 ]]; then + echo "Start Exporting models ..." + # Write export result to temp file + export_log_file=${logs_path}/export_log.txt + echo ' ' > ${export_log_file} + + export_result_file=${logs_path}/export_result.txt + echo ' ' > ${export_result_file} + # Run export + Run_Export + Print_Result ${export_result_file} + +fi + +# Write converter result to temp file +run_converter_log_file=${logs_path}/run_converter_log.txt +echo ' ' > ${run_converter_log_file} + +run_converter_result_file=${logs_path}/run_converter_result.txt +echo ' ' > ${run_converter_result_file} + +START=$(date +%s.%N) + +# Run converter +echo "start run converter ..." +Run_Converter +Run_converter_PID=$! +sleep 1 + +wait ${Run_converter_PID} +Run_converter_status=$? + +# Check converter result and return value +if [[ ${Run_converter_status} = 0 ]];then + echo "Run converter success" + Print_Result ${run_converter_result_file} +else + echo "Run converter failed" + cat ${run_converter_log_file} + Print_Result ${run_converter_result_file} + exit 1 +fi + + +# Write net_train result to temp file +run_net_train_result_file=${logs_path}/run_net_train_result.txt +echo ' ' > ${run_net_train_result_file} + +# Create log files +run_x86_log_file=${logs_path}/run_x86_log.txt +echo 'run x86 logs: ' > ${run_x86_log_file} + +run_arm64_log_file=${logs_path}/run_arm64_log.txt +echo 'run arm64 logs: ' > ${run_arm64_log_file} +adb_push_arm64_log_file=${logs_path}/adb_push_arm64_log.txt +adb_cmd_arm64_file=${logs_path}/adb_arm64_cmd.txt +adb_cmd_arm64_run_file=${logs_path}/adb_arm64_cmd_run.txt + +run_arm32_log_file=${logs_path}/run_arm32_log.txt +echo 'run arm32 logs: ' > ${run_arm64_log_file} +adb_push_arm32_log_file=${logs_path}/adb_push_arm32_log.txt +adb_cmd_arm32_file=${logs_path}/adb_arm32_cmd.txt +adb_cmd_arm32_run_file=${logs_path}/adb_arm32_cmd_run.txt + +# Copy the MindSpore models: +echo "Push files to net_train_test folder and run net_train" +net_train_test_path=${models_path}/net_train_test +rm -rf ${net_train_test_path} +mkdir -p ${net_train_test_path} +cp -a ${ms_models_path}/*.ms ${net_train_test_path} || exit 1 + +# Run on x86 +echo "start Run x86 ..." +Run_x86 & +Run_x86_PID=$! +sleep 1 + +# wait ${Run_x86_PID} +cat ${run_net_train_result_file} +wait ${Run_x86_PID} +Run_x86_status=$? +# exit 0 + +# Run on arm64 +echo "start Run arm64 ..." +Run_arm arm64 +Run_arm64_status=$? +sleep 3 + +# Run on arm32 +echo "start Run arm32 ..." +Run_arm arm32 +Run_arm32_status=$? +sleep 1 + +END=$(date +%s.%N) +DIFF=$(echo "$END - $START" | bc) + +function Print_Benchmark_Result() { + MS_PRINT_TESTCASE_START_MSG + while read line; do + arr=("${line}") + printf "%-20s %-100s %-7s\n" ${arr[0]} ${arr[1]} ${arr[2]} + done < ${run_net_train_result_file} + MS_PRINT_TESTCASE_END_MSG +} + +# Check net_train result and return value +if [[ ${Run_x86_status} != 0 ]];then + echo "Run_x86 failed" + cat ${run_x86_log_file} +fi + +if [[ ${Run_arm64_status} != 0 ]];then + echo "Run_arm64 failed" + cat ${run_arm64_log_file} +fi + +if [[ ${Run_arm32_status} != 0 ]];then + echo "Run_arm32 failed" + cat ${run_arm32_log_file} +fi + +echo "Test ended - Results:" +Print_Benchmark_Result +echo "Test run Time:" $DIFF +exit 0 diff --git a/mindspore/lite/test/run_train_ut.sh b/mindspore/lite/test/run_train_ut.sh index 669373b950..6bcccf4d17 100755 --- a/mindspore/lite/test/run_train_ut.sh +++ b/mindspore/lite/test/run_train_ut.sh @@ -1,5 +1,5 @@ #!/bin/bash cd ./ut/src/runtime/kernel/arm || exit 1 ../../../../../../build/test/lite-test --gtest_filter=NetworkTest.efficient_net -../../../../../../build/test/lite-test --gtest_filter=NetworkTest.tuning_layer -../../../../../../build/test/lite-test --gtest_filter=NetworkTest.lenetnet \ No newline at end of file +# ../../../../../../build/test/lite-test --gtest_filter=NetworkTest.tuning_layer +# ../../../../../../build/test/lite-test --gtest_filter=NetworkTest.lenetnet \ No newline at end of file diff --git a/mindspore/lite/test/ut/src/runtime/kernel/arm/fp32_grad/activation_grad_fp32_tests.cc b/mindspore/lite/test/ut/src/runtime/kernel/arm/fp32_grad/activation_grad_fp32_tests.cc index f5863fffc7..2f52e3ff18 100644 --- a/mindspore/lite/test/ut/src/runtime/kernel/arm/fp32_grad/activation_grad_fp32_tests.cc +++ b/mindspore/lite/test/ut/src/runtime/kernel/arm/fp32_grad/activation_grad_fp32_tests.cc @@ -42,11 +42,17 @@ TEST_F(TestActGradFp32, ReluGradFp32) { size_t input_size; std::string input_path = "./test_data/activationGrad/relu_y_50.bin"; auto input_data = reinterpret_cast(mindspore::lite::ReadFile(input_path.c_str(), &input_size)); + ASSERT_NE(input_data, nullptr); EXPECT_EQ(input_size, output_data_size * sizeof(float)); + std::string yt_path = "./test_data/activationGrad/relu_yt_50.bin"; auto yt_data = reinterpret_cast(mindspore::lite::ReadFile(yt_path.c_str(), &input_size)); + ASSERT_NE(yt_data, nullptr); EXPECT_EQ(input_size, output_data_size * sizeof(float)); + auto output_data = new float[output_data_size]; + ASSERT_NE(output_data, nullptr); + // warm up loop for (int i = 0; i < 3; i++) { ReluGrad(yt_data, input_data, output_data_size, output_data); @@ -90,10 +96,15 @@ TEST_F(TestActGradFp32, Relu6GradFp32) { size_t input_size; std::string input_path = "./test_data/activationGrad/relu6_y_50.bin"; auto input_data = reinterpret_cast(mindspore::lite::ReadFile(input_path.c_str(), &input_size)); + ASSERT_NE(input_data, nullptr); + std::string yt_path = "./test_data/activationGrad/relu6_yt_50.bin"; auto yt_data = reinterpret_cast(mindspore::lite::ReadFile(yt_path.c_str(), &input_size)); + ASSERT_NE(yt_data, nullptr); auto output_data = new float[output_data_size]; + ASSERT_NE(output_data, nullptr); + // warm up loop for (int i = 0; i < 3; i++) { Relu6Grad(yt_data, input_data, 50, output_data); @@ -136,10 +147,15 @@ TEST_F(TestActGradFp32, LReluGradFp32) { size_t input_size; std::string input_path = "./test_data/activationGrad/lrelu_y_50.bin"; auto input_data = reinterpret_cast(mindspore::lite::ReadFile(input_path.c_str(), &input_size)); + ASSERT_NE(input_data, nullptr); + std::string yt_path = "./test_data/activationGrad/lrelu_yt_50.bin"; auto yt_data = reinterpret_cast(mindspore::lite::ReadFile(yt_path.c_str(), &input_size)); + ASSERT_NE(yt_data, nullptr); auto output_data = new float[output_data_size]; + ASSERT_NE(output_data, nullptr); + // warm up loop for (int i = 0; i < 3; i++) { LReluGrad(yt_data, input_data, 50, output_data, 0.1); @@ -182,10 +198,15 @@ TEST_F(TestActGradFp32, SigmoidGradFp32) { size_t input_size; std::string input_path = "./test_data/activationGrad/sigmoid_y_50.bin"; auto input_data = reinterpret_cast(mindspore::lite::ReadFile(input_path.c_str(), &input_size)); + ASSERT_NE(input_data, nullptr); + std::string yt_path = "./test_data/activationGrad/sigmoid_yt_50.bin"; auto yt_data = reinterpret_cast(mindspore::lite::ReadFile(yt_path.c_str(), &input_size)); + ASSERT_NE(yt_data, nullptr); auto output_data = new float[output_data_size]; + ASSERT_NE(output_data, nullptr); + // warm up loop for (int i = 0; i < 3; i++) { SigmoidGrad(yt_data, input_data, 50, output_data); @@ -229,10 +250,15 @@ TEST_F(TestActGradFp32, tanhGradFp32) { size_t input_size; std::string input_path = "./test_data/activationGrad/tanh_y_50.bin"; auto input_data = reinterpret_cast(mindspore::lite::ReadFile(input_path.c_str(), &input_size)); + ASSERT_NE(input_data, nullptr); + std::string yt_path = "./test_data/activationGrad/tanh_yt_50.bin"; auto yt_data = reinterpret_cast(mindspore::lite::ReadFile(yt_path.c_str(), &input_size)); + ASSERT_NE(yt_data, nullptr); auto output_data = new float[output_data_size]; + ASSERT_NE(output_data, nullptr); + // warm up loop for (int i = 0; i < 3; i++) { TanhGrad(yt_data, input_data, 50, output_data); @@ -274,11 +300,17 @@ TEST_F(TestActGradFp32, hswishGradFp32) { size_t input_size; std::string input_path = "./test_data/activationGrad/hswish_x_50.bin"; auto input_data = reinterpret_cast(mindspore::lite::ReadFile(input_path.c_str(), &input_size)); + ASSERT_NE(input_data, nullptr); EXPECT_EQ(input_size, output_data_size * sizeof(float)); + std::string yt_path = "./test_data/activationGrad/hswish_yt_50.bin"; auto yt_data = reinterpret_cast(mindspore::lite::ReadFile(yt_path.c_str(), &input_size)); + ASSERT_NE(yt_data, nullptr); EXPECT_EQ(input_size, output_data_size * sizeof(float)); + auto output_data = new float[output_data_size]; + ASSERT_NE(output_data, nullptr); + // warm up loop for (int i = 0; i < 3; i++) { HSwishGrad(yt_data, input_data, static_cast(output_data_size), output_data); @@ -311,4 +343,58 @@ TEST_F(TestActGradFp32, hswishGradFp32) { delete[] yt_data; MS_LOG(INFO) << "hswishGradFp32 passed"; } + +TEST_F(TestActGradFp32, hsigmoidGradFp32) { + // runtime part + printf("Calculating runtime cost...\n"); + uint64_t time_avg = 0; + const size_t output_data_size = 10; + + size_t input_size; + std::string input_path = "./test_data/activationGrad/hsig_x_50.bin"; + auto input_data = reinterpret_cast(mindspore::lite::ReadFile(input_path.c_str(), &input_size)); + ASSERT_NE(input_data, nullptr); + EXPECT_EQ(input_size, output_data_size * sizeof(float)); + + std::string yt_path = "./test_data/activationGrad/hsig_yt_50.bin"; + auto yt_data = reinterpret_cast(mindspore::lite::ReadFile(yt_path.c_str(), &input_size)); + ASSERT_NE(yt_data, nullptr); + EXPECT_EQ(input_size, output_data_size * sizeof(float)); + + auto output_data = new float[output_data_size]; + ASSERT_NE(output_data, nullptr); + + // warm up loop + for (int i = 0; i < 3; i++) { + HSigmoidGrad(yt_data, input_data, static_cast(output_data_size), output_data); + } + + int loop_count = 100; + auto time_start = mindspore::lite::GetTimeUs(); + for (int i = 0; i < loop_count; i++) { + HSigmoidGrad(yt_data, input_data, output_data_size, output_data); + } + auto time_end = mindspore::lite::GetTimeUs(); + auto cost = time_end - time_start; + time_avg = cost / loop_count; + printf("single thread running time : %f ms\n", time_avg / 1000.0f); + + printf("==================output data=================\n"); + size_t min = (output_data_size < 20UL) ? output_data_size : 20UL; + for (size_t i = 0; i < min; i++) { + std::cout << output_data[i] << " ,"; + } + std::cout << std::endl; + + std::string output_path = "./test_data/activationGrad/hsig_out_50.bin"; + int res = CompareRelativeOutput(output_data, output_path); + + EXPECT_EQ(res, 0); + + delete[] input_data; + delete[] output_data; + delete[] yt_data; + MS_LOG(INFO) << "hsigmoidGradFp32 passed"; +} + } // namespace mindspore diff --git a/mindspore/lite/test/ut/src/runtime/kernel/arm/fp32_grad/arithmetic_grad_fp32_tests.cc b/mindspore/lite/test/ut/src/runtime/kernel/arm/fp32_grad/arithmetic_grad_fp32_tests.cc index 1b98f1db55..788fb7971c 100644 --- a/mindspore/lite/test/ut/src/runtime/kernel/arm/fp32_grad/arithmetic_grad_fp32_tests.cc +++ b/mindspore/lite/test/ut/src/runtime/kernel/arm/fp32_grad/arithmetic_grad_fp32_tests.cc @@ -24,9 +24,9 @@ #include "src/kernel_registry.h" #include "src/ops/arithmetic_grad.h" -#ifdef PRIMITIVE_WRITEABLE namespace mindspore { +#ifdef PRIMITIVE_WRITEABLE ArithmeticParameter *PopulateArithmeticParameter(mindspore::schema::PrimitiveType type, std::vector inputs, std::vector outputs) { @@ -37,6 +37,12 @@ ArithmeticParameter *PopulateArithmeticParameter(mindspore::schema::PrimitiveTyp } arithmetic_param->op_parameter_.type_ = type; schema::PrimitiveT *prim = new schema::PrimitiveT; + if (prim == nullptr) { + free(arithmetic_param); + MS_LOG(ERROR) << "new PrimitiveT failed."; + return nullptr; + } + prim->value.type = type; auto agrad = mindspore::lite::ArithmeticGrad(prim); agrad.InferShape(inputs, outputs); @@ -55,6 +61,7 @@ class TestArithmeticGradFp32 : public mindspore::CommonTest { std::vector GenerateTensorsForTest(const char *test, int test_id) { size_t input_size; + std::vector ret_vector; std::vector large_dim({4, 6}); std::vector small_dim({6}); int large_size = (4 * 6); @@ -80,36 +87,127 @@ std::vector GenerateTensorsForTest(const char *test, int test_id } auto dy_data = reinterpret_cast(mindspore::lite::ReadFile(test, &input_size)); + if (dy_data == nullptr) { + MS_LOG(ERROR) << "new operator failed"; + return ret_vector; + } lite::Tensor *dy_tensor = new lite::Tensor(TypeId::kNumberTypeFloat32, large_dim); + if (dy_tensor == nullptr) { + MS_LOG(ERROR) << "new operator failed"; + delete[] dy_data; + return ret_vector; + } dy_tensor->set_data(dy_data); auto x1_data = reinterpret_cast(mindspore::lite::ReadFile(dx1_file, &input_size)); + if (x1_data == nullptr) { + MS_LOG(ERROR) << "new operator failed"; + delete[] dy_data; + delete dy_tensor; + return ret_vector; + } lite::Tensor *x1_tensor = new lite::Tensor(TypeId::kNumberTypeFloat32, large_dim); + if (x1_tensor == nullptr) { + MS_LOG(ERROR) << "new operator failed"; + delete[] dy_data; + delete dy_tensor; + delete[] x1_data; + return ret_vector; + } x1_tensor->set_data(x1_data); auto x2_data = reinterpret_cast(mindspore::lite::ReadFile(dx2_file, &input_size)); + if (x2_data == nullptr) { + MS_LOG(ERROR) << "new operator failed"; + delete[] dy_data; + delete dy_tensor; + delete[] x1_data; + delete x1_tensor; + return ret_vector; + } lite::Tensor *x2_tensor = new lite::Tensor(TypeId::kNumberTypeFloat32, small_dim); + if (x2_tensor == nullptr) { + MS_LOG(ERROR) << "new operator failed"; + delete[] dy_data; + delete dy_tensor; + delete[] x1_data; + delete x1_tensor; + delete[] x2_data; + return ret_vector; + } x2_tensor->set_data(x2_data); auto dx1_data = new float[large_size]; + if (dx1_data == nullptr) { + MS_LOG(ERROR) << "new operator failed"; + delete[] dy_data; + delete dy_tensor; + delete[] x1_data; + delete x1_tensor; + delete[] x2_data; + delete x2_tensor; + return ret_vector; + } lite::Tensor *dx1_tensor = new lite::Tensor(TypeId::kNumberTypeFloat32, large_dim); + if (dx1_tensor == nullptr) { + MS_LOG(ERROR) << "new operator failed"; + delete[] dy_data; + delete dy_tensor; + delete[] x1_data; + delete x1_tensor; + delete[] x2_data; + delete x2_tensor; + delete[] dx1_data; + return ret_vector; + } dx1_tensor->set_data(dx1_data); auto dx2_data = new float[small_size]; + if (dx2_data == nullptr) { + MS_LOG(ERROR) << "new operator failed"; + delete[] dy_data; + delete dy_tensor; + delete[] x1_data; + delete x1_tensor; + delete[] x2_data; + delete x2_tensor; + delete[] dx1_data; + delete dx1_tensor; + return ret_vector; + } lite::Tensor *dx2_tensor = new lite::Tensor(TypeId::kNumberTypeFloat32, small_dim); + if (dx2_tensor == nullptr) { + MS_LOG(ERROR) << "new operator failed"; + delete[] dy_data; + delete dy_tensor; + delete[] x1_data; + delete x1_tensor; + delete[] x2_data; + delete x2_tensor; + delete[] dx1_data; + delete dx1_tensor; + delete[] dx2_data; + return ret_vector; + } dx2_tensor->set_data(dx2_data); - std::vector ret_vector = {dy_tensor, x1_tensor, x2_tensor, dx1_tensor, dx2_tensor}; + ret_vector.push_back(dy_tensor); + ret_vector.push_back(x1_tensor); + ret_vector.push_back(x2_tensor); + ret_vector.push_back(dx1_tensor); + ret_vector.push_back(dx2_tensor); + return ret_vector; } TEST_F(TestArithmeticGradFp32, TestAddGradFp32) { std::vector all_tensors = GenerateTensorsForTest("./test_data/operators/arithmetic_fp32_1_dy_4_6.bin", 1); - + ASSERT_NE(all_tensors.size(), 0); std::vector inputs = {all_tensors[0], all_tensors[1], all_tensors[2]}; std::vector outputs = {all_tensors[3], all_tensors[4]}; auto param = PopulateArithmeticParameter(schema::PrimitiveType_AddGrad, inputs, outputs); + ASSERT_NE(param, nullptr); lite::InnerContext ctx; ctx.thread_num_ = 1; @@ -117,7 +215,9 @@ TEST_F(TestArithmeticGradFp32, TestAddGradFp32) { kernel::KernelKey desc = {kernel::kCPU, TypeId::kNumberTypeFloat32, schema::PrimitiveType_AddGrad}; auto creator = lite::KernelRegistry::GetInstance()->GetCreator(desc); + ASSERT_NE(creator, nullptr); auto kernel_obj = creator(inputs, outputs, reinterpret_cast(param), &ctx, desc, nullptr); + ASSERT_NE(kernel_obj, nullptr); kernel_obj->Run(); float *output_ptr = reinterpret_cast(outputs[1]->MutableData()); @@ -144,10 +244,12 @@ TEST_F(TestArithmeticGradFp32, TestAddGradFp32) { TEST_F(TestArithmeticGradFp32, TestAddGrad2Fp32) { std::vector all_tensors = GenerateTensorsForTest("./test_data/operators/arithmetic_fp32_1_dy_4_6.bin", 1); + ASSERT_NE(all_tensors.size(), 0); std::vector inputs = {all_tensors[0], all_tensors[2], all_tensors[1]}; std::vector outputs = {all_tensors[4], all_tensors[3]}; auto param = PopulateArithmeticParameter(schema::PrimitiveType_AddGrad, inputs, outputs); + ASSERT_NE(param, nullptr); lite::InnerContext ctx; ctx.thread_num_ = 1; @@ -155,7 +257,9 @@ TEST_F(TestArithmeticGradFp32, TestAddGrad2Fp32) { kernel::KernelKey desc = {kernel::kCPU, TypeId::kNumberTypeFloat32, schema::PrimitiveType_AddGrad}; auto creator = lite::KernelRegistry::GetInstance()->GetCreator(desc); + ASSERT_NE(creator, nullptr); auto kernel_obj = creator(inputs, outputs, reinterpret_cast(param), &ctx, desc, nullptr); + ASSERT_NE(kernel_obj, nullptr); kernel_obj->Run(); float *output_ptr = reinterpret_cast(outputs[0]->MutableData()); @@ -184,10 +288,12 @@ TEST_F(TestArithmeticGradFp32, TestAddGrad2Fp32) { TEST_F(TestArithmeticGradFp32, TestAddGrad3Fp32) { std::vector all_tensors = GenerateTensorsForTest("./test_data/operators/arithmetic_fp32_8_dy_5_4_6.bin", 8); + ASSERT_NE(all_tensors.size(), 0); std::vector inputs = {all_tensors[0], all_tensors[1], all_tensors[2]}; std::vector outputs = {all_tensors[3], all_tensors[4]}; auto param = PopulateArithmeticParameter(schema::PrimitiveType_AddGrad, inputs, outputs); + ASSERT_NE(param, nullptr); lite::InnerContext ctx; ctx.thread_num_ = 1; @@ -195,7 +301,9 @@ TEST_F(TestArithmeticGradFp32, TestAddGrad3Fp32) { kernel::KernelKey desc = {kernel::kCPU, TypeId::kNumberTypeFloat32, schema::PrimitiveType_AddGrad}; auto creator = lite::KernelRegistry::GetInstance()->GetCreator(desc); + ASSERT_NE(creator, nullptr); auto kernel_obj = creator(inputs, outputs, reinterpret_cast(param), &ctx, desc, nullptr); + ASSERT_NE(kernel_obj, nullptr); kernel_obj->Run(); float *output_ptr = reinterpret_cast(outputs[0]->MutableData()); @@ -225,10 +333,12 @@ TEST_F(TestArithmeticGradFp32, TestAddGrad3Fp32) { TEST_F(TestArithmeticGradFp32, TestSubGradFp32) { std::vector all_tensors = GenerateTensorsForTest("./test_data/operators/arithmetic_fp32_2_dy_4_6.bin", 2); + ASSERT_NE(all_tensors.size(), 0); std::vector inputs = {all_tensors[0], all_tensors[1], all_tensors[2]}; std::vector outputs = {all_tensors[3], all_tensors[4]}; auto param = PopulateArithmeticParameter(schema::PrimitiveType_SubGrad, inputs, outputs); + ASSERT_NE(param, nullptr); lite::InnerContext ctx; ctx.thread_num_ = 1; @@ -236,7 +346,9 @@ TEST_F(TestArithmeticGradFp32, TestSubGradFp32) { kernel::KernelKey desc = {kernel::kCPU, TypeId::kNumberTypeFloat32, schema::PrimitiveType_SubGrad}; auto creator = lite::KernelRegistry::GetInstance()->GetCreator(desc); + ASSERT_NE(creator, nullptr); auto kernel_obj = creator(inputs, outputs, reinterpret_cast(param), &ctx, desc, nullptr); + ASSERT_NE(kernel_obj, nullptr); kernel_obj->Run(); float *output_ptr = reinterpret_cast(outputs[1]->MutableData()); @@ -266,10 +378,12 @@ TEST_F(TestArithmeticGradFp32, TestSubGradFp32) { TEST_F(TestArithmeticGradFp32, TestSubGrad2Fp32) { std::vector all_tensors = GenerateTensorsForTest("./test_data/operators/arithmetic_fp32_3_dy_4_6.bin", 3); + ASSERT_NE(all_tensors.size(), 0); std::vector inputs = {all_tensors[0], all_tensors[2], all_tensors[1]}; std::vector outputs = {all_tensors[4], all_tensors[3]}; auto param = PopulateArithmeticParameter(schema::PrimitiveType_SubGrad, inputs, outputs); + ASSERT_NE(param, nullptr); lite::InnerContext ctx; ctx.thread_num_ = 1; @@ -277,7 +391,9 @@ TEST_F(TestArithmeticGradFp32, TestSubGrad2Fp32) { kernel::KernelKey desc = {kernel::kCPU, TypeId::kNumberTypeFloat32, schema::PrimitiveType_SubGrad}; auto creator = lite::KernelRegistry::GetInstance()->GetCreator(desc); + ASSERT_NE(creator, nullptr); auto kernel_obj = creator(inputs, outputs, reinterpret_cast(param), &ctx, desc, nullptr); + ASSERT_NE(kernel_obj, nullptr); kernel_obj->Run(); float *output_ptr = reinterpret_cast(outputs[0]->MutableData()); @@ -305,10 +421,12 @@ TEST_F(TestArithmeticGradFp32, TestSubGrad2Fp32) { TEST_F(TestArithmeticGradFp32, TestMulGradFp32) { std::vector all_tensors = GenerateTensorsForTest("./test_data/operators/arithmetic_fp32_4_dy_4_6.bin", 4); + ASSERT_NE(all_tensors.size(), 0); std::vector inputs = {all_tensors[0], all_tensors[1], all_tensors[2]}; std::vector outputs = {all_tensors[3], all_tensors[4]}; auto param = PopulateArithmeticParameter(schema::PrimitiveType_MulGrad, inputs, outputs); + ASSERT_NE(param, nullptr); lite::InnerContext ctx; ctx.thread_num_ = 1; @@ -316,8 +434,9 @@ TEST_F(TestArithmeticGradFp32, TestMulGradFp32) { kernel::KernelKey desc = {kernel::kCPU, TypeId::kNumberTypeFloat32, schema::PrimitiveType_MulGrad}; auto creator = lite::KernelRegistry::GetInstance()->GetCreator(desc); + ASSERT_NE(creator, nullptr); auto kernel_obj = creator(inputs, outputs, reinterpret_cast(param), &ctx, desc, nullptr); - + ASSERT_NE(kernel_obj, nullptr); int loop_count = 1000; auto time_start = mindspore::lite::GetTimeUs(); for (int i = 0; i < loop_count; i++) { @@ -354,10 +473,12 @@ TEST_F(TestArithmeticGradFp32, TestMulGradFp32) { TEST_F(TestArithmeticGradFp32, TestMulGrad2Fp32) { std::vector all_tensors = GenerateTensorsForTest("./test_data/operators/arithmetic_fp32_4_dy_4_6.bin", 4); + ASSERT_NE(all_tensors.size(), 0); std::vector inputs = {all_tensors[0], all_tensors[2], all_tensors[1]}; std::vector outputs = {all_tensors[4], all_tensors[3]}; auto param = PopulateArithmeticParameter(schema::PrimitiveType_MulGrad, inputs, outputs); + ASSERT_NE(param, nullptr); lite::InnerContext ctx; ctx.thread_num_ = 1; @@ -365,7 +486,9 @@ TEST_F(TestArithmeticGradFp32, TestMulGrad2Fp32) { kernel::KernelKey desc = {kernel::kCPU, TypeId::kNumberTypeFloat32, schema::PrimitiveType_MulGrad}; auto creator = lite::KernelRegistry::GetInstance()->GetCreator(desc); + ASSERT_NE(creator, nullptr); auto kernel_obj = creator(inputs, outputs, reinterpret_cast(param), &ctx, desc, nullptr); + ASSERT_NE(kernel_obj, nullptr); kernel_obj->Run(); float *output_ptr = reinterpret_cast(outputs[0]->MutableData()); @@ -394,10 +517,12 @@ TEST_F(TestArithmeticGradFp32, TestMulGrad2Fp32) { TEST_F(TestArithmeticGradFp32, TestMulGrad3Fp32) { std::vector all_tensors = GenerateTensorsForTest("./test_data/operators/arithmetic_fp32_9_dy_5_4_6.bin", 9); + ASSERT_NE(all_tensors.size(), 0); std::vector inputs = {all_tensors[0], all_tensors[1], all_tensors[2]}; std::vector outputs = {all_tensors[3], all_tensors[4]}; auto param = PopulateArithmeticParameter(schema::PrimitiveType_MulGrad, inputs, outputs); + ASSERT_NE(param, nullptr); lite::InnerContext ctx; ctx.thread_num_ = 1; @@ -405,7 +530,9 @@ TEST_F(TestArithmeticGradFp32, TestMulGrad3Fp32) { kernel::KernelKey desc = {kernel::kCPU, TypeId::kNumberTypeFloat32, schema::PrimitiveType_MulGrad}; auto creator = lite::KernelRegistry::GetInstance()->GetCreator(desc); + ASSERT_NE(creator, nullptr); auto kernel_obj = creator(inputs, outputs, reinterpret_cast(param), &ctx, desc, nullptr); + ASSERT_NE(kernel_obj, nullptr); kernel_obj->Run(); float *output_ptr = reinterpret_cast(outputs[1]->MutableData()); @@ -434,10 +561,12 @@ TEST_F(TestArithmeticGradFp32, TestMulGrad3Fp32) { TEST_F(TestArithmeticGradFp32, TestMulGrad4Fp32) { std::vector all_tensors = GenerateTensorsForTest("./test_data/operators/arithmetic_fp32_9_dy_5_4_6.bin", 9); + ASSERT_NE(all_tensors.size(), 0); std::vector inputs = {all_tensors[0], all_tensors[2], all_tensors[1]}; std::vector outputs = {all_tensors[4], all_tensors[3]}; auto param = PopulateArithmeticParameter(schema::PrimitiveType_MulGrad, inputs, outputs); + ASSERT_NE(param, nullptr); lite::InnerContext ctx; ctx.thread_num_ = 1; @@ -445,7 +574,9 @@ TEST_F(TestArithmeticGradFp32, TestMulGrad4Fp32) { kernel::KernelKey desc = {kernel::kCPU, TypeId::kNumberTypeFloat32, schema::PrimitiveType_MulGrad}; auto creator = lite::KernelRegistry::GetInstance()->GetCreator(desc); + ASSERT_NE(creator, nullptr); auto kernel_obj = creator(inputs, outputs, reinterpret_cast(param), &ctx, desc, nullptr); + ASSERT_NE(kernel_obj, nullptr); kernel_obj->Run(); float *output_ptr = reinterpret_cast(outputs[0]->MutableData()); @@ -474,10 +605,12 @@ TEST_F(TestArithmeticGradFp32, TestMulGrad4Fp32) { TEST_F(TestArithmeticGradFp32, TestDivGradFp32) { std::vector all_tensors = GenerateTensorsForTest("./test_data/operators/arithmetic_fp32_5_dy_4_6.bin", 5); + ASSERT_NE(all_tensors.size(), 0); std::vector inputs = {all_tensors[0], all_tensors[1], all_tensors[2]}; std::vector outputs = {all_tensors[3], all_tensors[4]}; auto param = PopulateArithmeticParameter(schema::PrimitiveType_DivGrad, inputs, outputs); + ASSERT_NE(param, nullptr); lite::InnerContext ctx; ctx.thread_num_ = 1; @@ -485,7 +618,9 @@ TEST_F(TestArithmeticGradFp32, TestDivGradFp32) { kernel::KernelKey desc = {kernel::kCPU, TypeId::kNumberTypeFloat32, schema::PrimitiveType_DivGrad}; auto creator = lite::KernelRegistry::GetInstance()->GetCreator(desc); + ASSERT_NE(creator, nullptr); auto kernel_obj = creator(inputs, outputs, reinterpret_cast(param), &ctx, desc, nullptr); + ASSERT_NE(kernel_obj, nullptr); kernel_obj->Run(); float *output_ptr = reinterpret_cast(outputs[1]->MutableData()); @@ -514,10 +649,12 @@ TEST_F(TestArithmeticGradFp32, TestDivGradFp32) { TEST_F(TestArithmeticGradFp32, TestDivGrad2Fp32) { std::vector all_tensors = GenerateTensorsForTest("./test_data/operators/arithmetic_fp32_6_dy_4_6.bin", 6); + ASSERT_NE(all_tensors.size(), 0); std::vector inputs = {all_tensors[0], all_tensors[2], all_tensors[1]}; std::vector outputs = {all_tensors[4], all_tensors[3]}; auto param = PopulateArithmeticParameter(schema::PrimitiveType_DivGrad, inputs, outputs); + ASSERT_NE(param, nullptr); lite::InnerContext ctx; ctx.thread_num_ = 1; @@ -525,7 +662,9 @@ TEST_F(TestArithmeticGradFp32, TestDivGrad2Fp32) { kernel::KernelKey desc = {kernel::kCPU, TypeId::kNumberTypeFloat32, schema::PrimitiveType_DivGrad}; auto creator = lite::KernelRegistry::GetInstance()->GetCreator(desc); + ASSERT_NE(creator, nullptr); auto kernel_obj = creator(inputs, outputs, reinterpret_cast(param), &ctx, desc, nullptr); + ASSERT_NE(kernel_obj, nullptr); kernel_obj->Run(); float *output_ptr = reinterpret_cast(outputs[0]->MutableData()); @@ -555,10 +694,12 @@ TEST_F(TestArithmeticGradFp32, TestDivGrad2Fp32) { TEST_F(TestArithmeticGradFp32, TestDivGrad3Fp32) { std::vector all_tensors = GenerateTensorsForTest("./test_data/operators/arithmetic_fp32_10_dy_5_4_6.bin", 10); + ASSERT_NE(all_tensors.size(), 0); std::vector inputs = {all_tensors[0], all_tensors[1], all_tensors[2]}; std::vector outputs = {all_tensors[3], all_tensors[4]}; auto param = PopulateArithmeticParameter(schema::PrimitiveType_DivGrad, inputs, outputs); + ASSERT_NE(param, nullptr); lite::InnerContext ctx; ctx.thread_num_ = 1; @@ -566,7 +707,9 @@ TEST_F(TestArithmeticGradFp32, TestDivGrad3Fp32) { kernel::KernelKey desc = {kernel::kCPU, TypeId::kNumberTypeFloat32, schema::PrimitiveType_DivGrad}; auto creator = lite::KernelRegistry::GetInstance()->GetCreator(desc); + ASSERT_NE(creator, nullptr); auto kernel_obj = creator(inputs, outputs, reinterpret_cast(param), &ctx, desc, nullptr); + ASSERT_NE(kernel_obj, nullptr); kernel_obj->Run(); float *output_ptr = reinterpret_cast(outputs[1]->MutableData()); @@ -595,10 +738,12 @@ TEST_F(TestArithmeticGradFp32, TestDivGrad3Fp32) { TEST_F(TestArithmeticGradFp32, Test3DDivGrad2Fp32) { std::vector all_tensors = GenerateTensorsForTest("./test_data/operators/arithmetic_fp32_7_dy_4_5_6.bin", 7); + ASSERT_NE(all_tensors.size(), 0); std::vector inputs = {all_tensors[0], all_tensors[1], all_tensors[2]}; std::vector outputs = {all_tensors[3], all_tensors[4]}; auto param = PopulateArithmeticParameter(schema::PrimitiveType_DivGrad, inputs, outputs); + ASSERT_NE(param, nullptr); lite::InnerContext ctx; ctx.thread_num_ = 1; @@ -606,7 +751,9 @@ TEST_F(TestArithmeticGradFp32, Test3DDivGrad2Fp32) { kernel::KernelKey desc = {kernel::kCPU, TypeId::kNumberTypeFloat32, schema::PrimitiveType_DivGrad}; auto creator = lite::KernelRegistry::GetInstance()->GetCreator(desc); + ASSERT_NE(creator, nullptr); auto kernel_obj = creator(inputs, outputs, reinterpret_cast(param), &ctx, desc, nullptr); + ASSERT_NE(kernel_obj, nullptr); kernel_obj->Run(); float *output_ptr = reinterpret_cast(outputs[1]->MutableData()); @@ -630,6 +777,91 @@ TEST_F(TestArithmeticGradFp32, Test3DDivGrad2Fp32) { MS_LOG(INFO) << "TestDivGrad2Fp32 passed"; } -} // namespace mindspore +TEST_F(TestArithmeticGradFp32, TestMaximumGradBroadcastFp32) { + std::vector large_dim({4, 6}); + std::vector small_dim({6}); + + large_dim = std::vector({1, 2, 3}); + small_dim = std::vector({1, 3}); + int large_size = (2 * 3); + int small_size = 3; + size_t input_size; + char *dx1_file = const_cast("./test_data/operators/x1_maximum.bin"); + char *dx2_file = const_cast("./test_data/operators/x2_maximum.bin"); + + std::string yt_path = "./test_data/operators/yt_maximum.bin"; + auto dy_data = reinterpret_cast(mindspore::lite::ReadFile(yt_path.c_str(), &input_size)); + ASSERT_NE(dy_data, nullptr); + EXPECT_EQ(input_size, large_size * sizeof(float)); + lite::Tensor *dy_tensor = new lite::Tensor(TypeId::kNumberTypeFloat32, large_dim); + ASSERT_NE(dy_tensor, nullptr); + dy_tensor->set_data(dy_data); + + auto x1_data = reinterpret_cast(mindspore::lite::ReadFile(dx1_file, &input_size)); + ASSERT_NE(x1_data, nullptr); + lite::Tensor *x1_tensor = new lite::Tensor(TypeId::kNumberTypeFloat32, small_dim); + ASSERT_NE(x1_tensor, nullptr); + x1_tensor->set_data(x1_data); + + auto x2_data = reinterpret_cast(mindspore::lite::ReadFile(dx2_file, &input_size)); + ASSERT_NE(x2_data, nullptr); + lite::Tensor *x2_tensor = new lite::Tensor(TypeId::kNumberTypeFloat32, large_dim); + ASSERT_NE(x2_tensor, nullptr); + x2_tensor->set_data(x2_data); + + auto dx1_data = new float[small_size]; + ASSERT_NE(dx1_data, nullptr); + lite::Tensor *dx1_tensor = new lite::Tensor(TypeId::kNumberTypeFloat32, small_dim); + ASSERT_NE(dx1_tensor, nullptr); + dx1_tensor->set_data(dx1_data); + auto dx2_data = new float[large_size]; + ASSERT_NE(dx2_data, nullptr); + lite::Tensor *dx2_tensor = new lite::Tensor(TypeId::kNumberTypeFloat32, large_dim); + ASSERT_NE(dx2_tensor, nullptr); + dx2_tensor->set_data(dx2_data); + + std::vector inputs = {x1_tensor, x2_tensor, dy_tensor}; + std::vector outputs = {dx1_tensor, dx2_tensor}; + + auto param = PopulateArithmeticParameter(schema::PrimitiveType_MaximumGrad, inputs, outputs); + ASSERT_NE(param, nullptr); + + lite::InnerContext ctx; + ctx.thread_num_ = 1; + ASSERT_EQ(lite::RET_OK, ctx.Init()); + + kernel::KernelKey desc = {kernel::kCPU, TypeId::kNumberTypeFloat32, schema::PrimitiveType_MaximumGrad}; + auto creator = lite::KernelRegistry::GetInstance()->GetCreator(desc); + ASSERT_NE(creator, nullptr); + auto kernel_obj = creator(inputs, outputs, reinterpret_cast(param), &ctx, desc, nullptr); + ASSERT_NE(kernel_obj, nullptr); + kernel_obj->Run(); + + float *output_ptr = reinterpret_cast(outputs[1]->MutableData()); + printf("==================output data=================\n"); + for (int i = 0; i < 6; i++) { + std::cout << output_ptr[i] << " ,"; + } + std::cout << std::endl; + + std::string dx1_path = "./test_data/operators/x1_grad_maximum.bin"; + EXPECT_EQ(0, CompareRelativeOutput(reinterpret_cast(outputs[0]->MutableData()), dx1_path)); + + std::string output_path = "./test_data/operators/x2_grad_maximum.bin"; + EXPECT_EQ(0, CompareRelativeOutput(output_ptr, output_path)); + for (auto tensor : inputs) { + delete[] reinterpret_cast(tensor->MutableData()); + tensor->set_data(nullptr); + delete tensor; + } + for (auto tensor : outputs) { + delete[] reinterpret_cast(tensor->MutableData()); + tensor->set_data(nullptr); + delete tensor; + } + delete kernel_obj; + MS_LOG(INFO) << "TestMaximumGradBroadcastFp32 passed"; +} #endif +} // namespace mindspore diff --git a/mindspore/lite/test/ut/src/runtime/kernel/arm/fp32_grad/bias_grad_fp32_tests.cc b/mindspore/lite/test/ut/src/runtime/kernel/arm/fp32_grad/bias_grad_fp32_tests.cc index 03659e39ee..b0d94b9d2c 100644 --- a/mindspore/lite/test/ut/src/runtime/kernel/arm/fp32_grad/bias_grad_fp32_tests.cc +++ b/mindspore/lite/test/ut/src/runtime/kernel/arm/fp32_grad/bias_grad_fp32_tests.cc @@ -31,15 +31,20 @@ class TestBiasGradFp32 : public mindspore::CommonTest { TEST_F(TestBiasGradFp32, BiasGradFp32) { // prepare stage ArithmeticParameter *bias_param = static_cast(malloc(sizeof(ArithmeticParameter))); + ASSERT_NE(bias_param, nullptr); + size_t input_size; std::string input_path = "./test_data/operators/biasgradfp32_1_dy_10_28_28_7.bin"; auto input_data = reinterpret_cast(mindspore::lite::ReadFile(input_path.c_str(), &input_size)); + ASSERT_NE(input_data, nullptr); + std::vector dim_dy({10, 28, 28, 7}); lite::Tensor dy_tensor(TypeId::kNumberTypeFloat32, dim_dy); dy_tensor.set_data(input_data); std::vector inputs = {&dy_tensor}; auto output_data = new float[7]; + ASSERT_NE(output_data, nullptr); std::vector dim_dw = {7}; lite::Tensor dw_tensor(TypeId::kNumberTypeFloat32, dim_dw); dw_tensor.set_data(output_data); @@ -51,8 +56,9 @@ TEST_F(TestBiasGradFp32, BiasGradFp32) { kernel::KernelKey desc = {kernel::kCPU, TypeId::kNumberTypeFloat32, schema::PrimitiveType_BiasGrad}; auto creator = lite::KernelRegistry::GetInstance()->GetCreator(desc); + ASSERT_NE(creator, nullptr); auto kernel_obj = creator(inputs, outputs, reinterpret_cast(bias_param), &ctx, desc, nullptr); - + ASSERT_NE(kernel_obj, nullptr); kernel_obj->Run(); printf("==================output data=================\n"); @@ -61,7 +67,57 @@ TEST_F(TestBiasGradFp32, BiasGradFp32) { } std::cout << std::endl; std::string output_path = "./test_data/operators/biasgradfp32_1_db_7.bin"; - CompareOutput(output_data, 7, output_path); + auto res = CompareRelativeOutput(output_data, output_path); + EXPECT_EQ(res, 0); + + delete[] input_data; + delete[] output_data; + // delete bias_param; + dy_tensor.set_data(nullptr); + dw_tensor.set_data(nullptr); + delete kernel_obj; + MS_LOG(INFO) << "BiasGradFp32 passed"; +} + +TEST_F(TestBiasGradFp32, BiasGrad2DFp32) { + // prepare stage + ArithmeticParameter *bias_param = static_cast(malloc(sizeof(ArithmeticParameter))); + ASSERT_NE(bias_param, nullptr); + + size_t input_size; + std::string input_path = "./test_data/operators/fc_yt.f32"; + auto input_data = reinterpret_cast(mindspore::lite::ReadFile(input_path.c_str(), &input_size)); + std::vector dim_dy({2, 20}); + lite::Tensor dy_tensor(TypeId::kNumberTypeFloat32, dim_dy); + dy_tensor.set_data(input_data); + + std::vector inputs = {&dy_tensor}; + auto output_data = new float[20]; + ASSERT_NE(output_data, nullptr); + std::vector dim_dw = {20}; + lite::Tensor dw_tensor(TypeId::kNumberTypeFloat32, dim_dw); + dw_tensor.set_data(output_data); + std::vector outputs = {&dw_tensor}; + + lite::InnerContext ctx; + ctx.thread_num_ = 1; + ASSERT_EQ(lite::RET_OK, ctx.Init()); + + kernel::KernelKey desc = {kernel::kCPU, TypeId::kNumberTypeFloat32, schema::PrimitiveType_BiasGrad}; + auto creator = lite::KernelRegistry::GetInstance()->GetCreator(desc); + ASSERT_NE(creator, nullptr); + auto kernel_obj = creator(inputs, outputs, reinterpret_cast(bias_param), &ctx, desc, nullptr); + ASSERT_NE(kernel_obj, nullptr); + kernel_obj->Run(); + + printf("==================output data=================\n"); + for (int i = 0; i < 20; i++) { + std::cout << output_data[i] << " ,"; + } + std::cout << std::endl; + std::string output_path = "./test_data/operators/fc_b_grad.f32"; + auto res = CompareRelativeOutput(output_data, output_path); + EXPECT_EQ(res, 0); delete[] input_data; delete[] output_data; diff --git a/mindspore/lite/test/ut/src/runtime/kernel/arm/fp32_grad/bn_grad_fp32_test.cc b/mindspore/lite/test/ut/src/runtime/kernel/arm/fp32_grad/bn_grad_fp32_test.cc index 913d685725..bd0554c04d 100644 --- a/mindspore/lite/test/ut/src/runtime/kernel/arm/fp32_grad/bn_grad_fp32_test.cc +++ b/mindspore/lite/test/ut/src/runtime/kernel/arm/fp32_grad/bn_grad_fp32_test.cc @@ -35,6 +35,10 @@ lite::Tensor *TestBNGradFp32::CreateInTensor(std::string file_name, std::vector< size_t input_size = 0; auto input_data = reinterpret_cast(mindspore::lite::ReadFile(file_name.c_str(), &input_size)); auto tensor = new lite::Tensor(TypeId::kNumberTypeFloat32, dim); + if (tensor == nullptr) { + MS_LOG(ERROR) << "new tensor failed"; + return nullptr; + } tensor->set_data(input_data); EXPECT_EQ(input_size, tensor->Size()); return tensor; @@ -43,7 +47,9 @@ lite::Tensor *TestBNGradFp32::CreateInTensor(std::string file_name, std::vector< TEST_F(TestBNGradFp32, BNGradFp32) { // prepare stage auto bn_param = static_cast(malloc(sizeof(BNGradParameter))); - bn_param->epsilon_ = 0.00001; + ASSERT_NE(bn_param, nullptr); + + bn_param->epsilon_ = 1e-2; bn_param->momentum_ = 0.1; const int batch = 2; const int channels = 3; @@ -51,10 +57,16 @@ TEST_F(TestBNGradFp32, BNGradFp32) { const int width = 5; auto dy_tensor = CreateInTensor("./test_data/bngrad/dy_2_4_5_3.bin", {batch, height, width, channels}); + ASSERT_NE(dy_tensor, nullptr); auto x_tensor = CreateInTensor("./test_data/bngrad/input_x_2_4_5_3.bin", {batch, height, width, channels}); + ASSERT_NE(x_tensor, nullptr); auto scale_tensor = CreateInTensor("./test_data/bngrad/scale_3.bin", {1, 1, 1, channels}); + ASSERT_NE(scale_tensor, nullptr); auto mean_tensor = CreateInTensor("./test_data/bngrad/save_mean_3.bin", {1, 1, 1, channels}); + ASSERT_NE(mean_tensor, nullptr); auto var_tensor = CreateInTensor("././test_data/bngrad/save_var_3.bin", {1, 1, 1, channels}); + ASSERT_NE(var_tensor, nullptr); + // prepare output tensors lite::Tensor dx_tensor(TypeId::kNumberTypeFloat32, {batch, height, width, channels}); ASSERT_EQ(dx_tensor.MallocData(), 0); @@ -72,27 +84,18 @@ TEST_F(TestBNGradFp32, BNGradFp32) { kernel::KernelKey desc = {kernel::kCPU, TypeId::kNumberTypeFloat32, schema::PrimitiveType_BNGrad}; auto creator = lite::KernelRegistry::GetInstance()->GetCreator(desc); + ASSERT_NE(creator, nullptr); auto kernel_obj = creator(inputs, outputs, reinterpret_cast(bn_param), &ctx, desc, nullptr); + ASSERT_NE(kernel_obj, nullptr); mindspore::kernel::LiteKernel::AllocWorkspace(kernel_obj->GetWorkspaceSize()); - for (int i = 0; i < 3; i++) { - kernel_obj->Run(); - } - - int loop_count = 100; - auto time_start = mindspore::lite::GetTimeUs(); - for (int i = 0; i < loop_count; i++) { - kernel_obj->Run(); - } - auto time_end = mindspore::lite::GetTimeUs(); - auto cost = time_end - time_start; - auto time_avg = cost / loop_count; - std::cout << "single thread running time : " << time_avg << "us\n"; + kernel_obj->Run(); std::cout << "==========dx==========\n"; auto dx = reinterpret_cast(outputs[0]->MutableData()); for (int i = 0; i < 7; i++) std::cout << dx[i] << " "; std::cout << "\n"; auto res = CompareRelativeOutput(dx, "./test_data/bngrad/output_dx_2_4_5_3.bin"); + EXPECT_EQ(res, 0); std::cout << "\n=======dscale=======\n"; auto dscale = reinterpret_cast(outputs[1]->MutableData()); for (int i = 0; i < channels; i++) std::cout << dscale[i] << " "; @@ -104,7 +107,6 @@ TEST_F(TestBNGradFp32, BNGradFp32) { for (int i = 0; i < 3; i++) std::cout << dbias[i] << " "; std::cout << "\n"; res = CompareRelativeOutput(dbias, "./test_data/bngrad/output_dbias_3.bin"); - EXPECT_EQ(res, 0); for (auto v : inputs) { delete[] reinterpret_cast(v->MutableData()); v->set_data(nullptr); @@ -117,8 +119,10 @@ TEST_F(TestBNGradFp32, BNGradFp32) { TEST_F(TestBNGradFp32, BNTtrainFp32) { auto bn_param = static_cast(malloc(sizeof(BatchNormParameter))); - bn_param->epsilon_ = 0.00001; - bn_param->momentum_ = 0.; + ASSERT_NE(bn_param, nullptr); + + bn_param->epsilon_ = 1e-2; + bn_param->momentum_ = 0.1; const int batch = 2; const int channels = 3; const int height = 4; @@ -173,27 +177,34 @@ TEST_F(TestBNGradFp32, BNTtrainFp32) { ASSERT_EQ(lite::RET_OK, context.Init()); auto creator = lite::KernelRegistry::GetInstance()->GetCreator(desc); + ASSERT_NE(creator, nullptr); auto kernel_obj = creator(inputs, outputs, reinterpret_cast(bn_param), &context, desc, nullptr); + ASSERT_NE(kernel_obj, nullptr); mindspore::kernel::LiteKernel::AllocWorkspace(kernel_obj->GetWorkspaceSize()); float *save_mean = reinterpret_cast(save_mean_tensor.MutableData()); float *save_var = reinterpret_cast(save_var_tensor.MutableData()); - std::fill(save_mean, save_mean + channels, 0.f); - std::fill(save_var, save_var + channels, 0.f); + for (int i = 0; i < channels; i++) { + save_var[i] = 1.f; + save_mean[i] = 0.f; + } + float *curr_mean = reinterpret_cast(mean_tensor.MutableData()); + float *curr_var = reinterpret_cast(var_tensor.MutableData()); - kernel_obj->train(); + kernel_obj->Train(); + kernel_obj->SetTrainable(true); kernel_obj->Run(); std::cout << "================save_mean==============================\n"; - for (int i = 0; i < channels; i++) std::cout << save_mean[i] << " "; + for (int i = 0; i < channels; i++) std::cout << curr_mean[i] << " "; std::cout << "\n"; std::cout << "===============save_var==============================\n"; - for (int i = 0; i < channels; i++) std::cout << save_var[i] << " "; + for (int i = 0; i < channels; i++) std::cout << curr_var[i] << " "; std::cout << "\n"; delete[] reinterpret_cast(x_tensor->MutableData()); - auto res = CompareRelativeOutput(save_mean, "./test_data/bngrad/running_mean_3.bin"); + auto res = CompareRelativeOutput(curr_mean, "./test_data/bngrad/running_mean_3.bin"); EXPECT_EQ(res, 0); - res = CompareRelativeOutput(save_var, "./test_data/bngrad/running_var_3.bin"); + res = CompareRelativeOutput(curr_var, "./test_data/bngrad/running_var_3.bin"); EXPECT_EQ(res, 0); x_tensor->set_data(nullptr); diff --git a/mindspore/lite/test/ut/src/runtime/kernel/arm/fp32_grad/convolution_grad_fp32_tests.cc b/mindspore/lite/test/ut/src/runtime/kernel/arm/fp32_grad/convolution_grad_fp32_tests.cc index 080cb1d17c..0625adb70f 100644 --- a/mindspore/lite/test/ut/src/runtime/kernel/arm/fp32_grad/convolution_grad_fp32_tests.cc +++ b/mindspore/lite/test/ut/src/runtime/kernel/arm/fp32_grad/convolution_grad_fp32_tests.cc @@ -77,11 +77,13 @@ void InitConvParamGroup3Dilation2FP32(ConvParameter *conv_param) { TEST_F(TestConvolutionGradFp32, ConvFp32FilterGrad) { // prepare stage auto conv_param = static_cast(malloc(sizeof(ConvParameter))); - InitConvParamGroup1FP32(conv_param); + ASSERT_NE(conv_param, nullptr); + InitConvParamGroup1FP32(conv_param); size_t dy_size; std::string dy_path = "./test_data/conv/convfp32_dy_1_28_28_32.bin"; auto dy_data = reinterpret_cast(mindspore::lite::ReadFile(dy_path.c_str(), &dy_size)); + ASSERT_NE(dy_data, nullptr); std::vector dim_dy({1, 28, 28, 32}); lite::Tensor dy_tensor(TypeId::kNumberTypeFloat32, dim_dy); dy_tensor.set_data(dy_data); @@ -95,11 +97,13 @@ TEST_F(TestConvolutionGradFp32, ConvFp32FilterGrad) { size_t input_size; std::string input_path = "./test_data/conv/convfp32_x_1_28_28_3.bin"; auto input_data = reinterpret_cast(mindspore::lite::ReadFile(input_path.c_str(), &input_size)); + ASSERT_NE(input_data, nullptr); std::vector dim_x({1, 28, 28, 3}); lite::Tensor x_tensor(TypeId::kNumberTypeFloat32, dim_x); x_tensor.set_data(input_data); auto dw_data = new float[output_data_size]; + ASSERT_NE(dw_data, nullptr); std::vector dim_dw({32, 3, 3, 3}); lite::Tensor dw_tensor(TypeId::kNumberTypeFloat32, dim_dw); dw_tensor.set_data(dw_data); @@ -112,7 +116,9 @@ TEST_F(TestConvolutionGradFp32, ConvFp32FilterGrad) { kernel::KernelKey desc = {kernel::kCPU, TypeId::kNumberTypeFloat32, schema::PrimitiveType_Conv2DGradFilter}; auto creator = lite::KernelRegistry::GetInstance()->GetCreator(desc); + ASSERT_NE(creator, nullptr); auto kernel = creator(inputs, outputs, reinterpret_cast(conv_param), &context, desc, nullptr); + ASSERT_NE(kernel, nullptr); mindspore::kernel::LiteKernel::AllocWorkspace(kernel->GetWorkspaceSize()); // warm up loop for (int i = 0; i < 3; i++) { @@ -149,8 +155,9 @@ TEST_F(TestConvolutionGradFp32, ConvFp32FilterGrad) { TEST_F(TestConvolutionGradFp32, ConvFp32InputGrad) { // prepare stage auto conv_param = static_cast(malloc(sizeof(ConvParameter))); - InitConvParamGroup1FP32(conv_param); + ASSERT_NE(conv_param, nullptr); + InitConvParamGroup1FP32(conv_param); size_t dy_size; std::string dy_path = "./test_data/conv/convfp32_dy_1_28_28_32.bin"; auto dy_data = reinterpret_cast(mindspore::lite::ReadFile(dy_path.c_str(), &dy_size)); @@ -168,6 +175,7 @@ TEST_F(TestConvolutionGradFp32, ConvFp32InputGrad) { size_t output_data_size = conv_param->input_batch_ * conv_param->input_h_ * conv_param->input_w_ * conv_param->input_channel_; auto dx_data = new float[output_data_size]; + ASSERT_NE(dx_data, nullptr); std::vector dim_dx({1, 28, 28, 3}); lite::Tensor dx_tensor(TypeId::kNumberTypeFloat32, dim_dx); dx_tensor.set_data(dx_data); @@ -185,7 +193,9 @@ TEST_F(TestConvolutionGradFp32, ConvFp32InputGrad) { kernel::KernelKey desc = {kernel::kCPU, TypeId::kNumberTypeFloat32, schema::PrimitiveType_Conv2DGradInput}; auto creator = lite::KernelRegistry::GetInstance()->GetCreator(desc); + ASSERT_NE(creator, nullptr); auto kernel = creator(inputs, outputs, reinterpret_cast(conv_param), &context, desc, nullptr); + ASSERT_NE(kernel, nullptr); mindspore::kernel::LiteKernel::AllocWorkspace(kernel->GetWorkspaceSize()); // warm up loop @@ -222,8 +232,9 @@ TEST_F(TestConvolutionGradFp32, ConvFp32InputGrad) { TEST_F(TestConvolutionGradFp32, ConvFp32GroupFilterGrad) { // prepare stage auto conv_param = static_cast(malloc(sizeof(ConvParameter))); - InitConvParamGroup3FP32(conv_param); + ASSERT_NE(conv_param, nullptr); + InitConvParamGroup3FP32(conv_param); size_t dy_size; std::string dy_path = "./test_data/conv/convfp32_dy_g3_1_28_28_18.bin"; auto dy_data = reinterpret_cast(mindspore::lite::ReadFile(dy_path.c_str(), &dy_size)); @@ -245,6 +256,7 @@ TEST_F(TestConvolutionGradFp32, ConvFp32GroupFilterGrad) { x_tensor.set_data(input_data); auto dw_data = new float[output_data_size]; + ASSERT_NE(dw_data, nullptr); std::vector dim_dw({18, 3, 3, 1}); lite::Tensor dw_tensor(TypeId::kNumberTypeFloat32, dim_dw); dw_tensor.set_data(dw_data); @@ -257,7 +269,9 @@ TEST_F(TestConvolutionGradFp32, ConvFp32GroupFilterGrad) { kernel::KernelKey desc = {kernel::kCPU, TypeId::kNumberTypeFloat32, schema::PrimitiveType_Conv2DGradFilter}; auto creator = lite::KernelRegistry::GetInstance()->GetCreator(desc); + ASSERT_NE(creator, nullptr); auto kernel = creator(inputs, outputs, reinterpret_cast(conv_param), &context, desc, nullptr); + ASSERT_NE(kernel, nullptr); mindspore::kernel::LiteKernel::AllocWorkspace(kernel->GetWorkspaceSize()); // warm up loop for (int i = 0; i < 3; i++) { @@ -293,8 +307,9 @@ TEST_F(TestConvolutionGradFp32, ConvFp32GroupFilterGrad) { TEST_F(TestConvolutionGradFp32, ConvFp32GroupInputGrad) { // prepare stage auto conv_param = static_cast(malloc(sizeof(ConvParameter))); - InitConvParamGroup3FP32(conv_param); + ASSERT_NE(conv_param, nullptr); + InitConvParamGroup3FP32(conv_param); size_t dy_size; std::string dy_path = "./test_data/conv/convfp32_dy_g3_1_28_28_18.bin"; auto dy_data = reinterpret_cast(mindspore::lite::ReadFile(dy_path.c_str(), &dy_size)); @@ -312,6 +327,7 @@ TEST_F(TestConvolutionGradFp32, ConvFp32GroupInputGrad) { size_t output_data_size = conv_param->input_batch_ * conv_param->input_h_ * conv_param->input_w_ * conv_param->input_channel_; auto dx_data = new float[output_data_size]; + ASSERT_NE(dx_data, nullptr); std::vector dim_dx({1, 28, 28, 3}); lite::Tensor dx_tensor(TypeId::kNumberTypeFloat32, dim_dx); dx_tensor.set_data(dx_data); @@ -329,7 +345,9 @@ TEST_F(TestConvolutionGradFp32, ConvFp32GroupInputGrad) { kernel::KernelKey desc = {kernel::kCPU, TypeId::kNumberTypeFloat32, schema::PrimitiveType_Conv2DGradInput}; auto creator = lite::KernelRegistry::GetInstance()->GetCreator(desc); + ASSERT_NE(creator, nullptr); auto kernel = creator(inputs, outputs, reinterpret_cast(conv_param), &context, desc, nullptr); + ASSERT_NE(kernel, nullptr); mindspore::kernel::LiteKernel::AllocWorkspace(kernel->GetWorkspaceSize()); // warm up loop for (int i = 0; i < 3; i++) { @@ -365,9 +383,9 @@ TEST_F(TestConvolutionGradFp32, ConvFp32GroupInputGrad) { TEST_F(TestConvolutionGradFp32, ConvFp32GroupDilationFilterGrad) { // prepare stage auto conv_param = static_cast(malloc(sizeof(ConvParameter))); + ASSERT_NE(conv_param, nullptr); InitConvParamGroup3Dilation2FP32(conv_param); - size_t dy_size; std::string dy_path = "./test_data/conv/convfp32_dy_g3_d2_1_26_26_18.bin"; auto dy_data = reinterpret_cast(mindspore::lite::ReadFile(dy_path.c_str(), &dy_size)); @@ -389,6 +407,7 @@ TEST_F(TestConvolutionGradFp32, ConvFp32GroupDilationFilterGrad) { x_tensor.set_data(input_data); auto dw_data = new float[output_data_size]; + ASSERT_NE(dw_data, nullptr); std::vector dim_dw({18, 3, 3, 1}); lite::Tensor dw_tensor(TypeId::kNumberTypeFloat32, dim_dw); dw_tensor.set_data(dw_data); @@ -401,7 +420,9 @@ TEST_F(TestConvolutionGradFp32, ConvFp32GroupDilationFilterGrad) { kernel::KernelKey desc = {kernel::kCPU, TypeId::kNumberTypeFloat32, schema::PrimitiveType_Conv2DGradFilter}; auto creator = lite::KernelRegistry::GetInstance()->GetCreator(desc); + ASSERT_NE(creator, nullptr); auto kernel = creator(inputs, outputs, reinterpret_cast(conv_param), &context, desc, nullptr); + ASSERT_NE(kernel, nullptr); mindspore::kernel::LiteKernel::AllocWorkspace(kernel->GetWorkspaceSize()); // warm up loop @@ -437,8 +458,9 @@ TEST_F(TestConvolutionGradFp32, ConvFp32GroupDilationFilterGrad) { TEST_F(TestConvolutionGradFp32, ConvFp32GroupDilationInputGrad) { // prepare stage auto conv_param = static_cast(malloc(sizeof(ConvParameter))); - InitConvParamGroup3Dilation2FP32(conv_param); + ASSERT_NE(conv_param, nullptr); + InitConvParamGroup3Dilation2FP32(conv_param); size_t dy_size; std::string dy_path = "./test_data/conv/convfp32_dy_g3_d2_1_26_26_18.bin"; auto dy_data = reinterpret_cast(mindspore::lite::ReadFile(dy_path.c_str(), &dy_size)); @@ -456,6 +478,7 @@ TEST_F(TestConvolutionGradFp32, ConvFp32GroupDilationInputGrad) { size_t output_data_size = conv_param->input_batch_ * conv_param->input_h_ * conv_param->input_w_ * conv_param->input_channel_; auto dx_data = new float[output_data_size]; + ASSERT_NE(dx_data, nullptr); std::vector dim_dx({1, 28, 28, 3}); lite::Tensor dx_tensor(TypeId::kNumberTypeFloat32, dim_dx); dx_tensor.set_data(dx_data); @@ -473,7 +496,9 @@ TEST_F(TestConvolutionGradFp32, ConvFp32GroupDilationInputGrad) { kernel::KernelKey desc = {kernel::kCPU, TypeId::kNumberTypeFloat32, schema::PrimitiveType_Conv2DGradInput}; auto creator = lite::KernelRegistry::GetInstance()->GetCreator(desc); + ASSERT_NE(creator, nullptr); auto kernel = creator(inputs, outputs, reinterpret_cast(conv_param), &context, desc, nullptr); + ASSERT_NE(kernel, nullptr); mindspore::kernel::LiteKernel::AllocWorkspace(kernel->GetWorkspaceSize()); int loop_count = 100; @@ -504,8 +529,9 @@ TEST_F(TestConvolutionGradFp32, ConvFp32GroupDilationInputGrad) { TEST_F(TestConvolutionGradFp32, ConvGroupDilation) { // prepare stage auto conv_param = static_cast(malloc(sizeof(ConvParameter))); - InitConvParamGroup3Dilation2FP32(conv_param); + ASSERT_NE(conv_param, nullptr); + InitConvParamGroup3Dilation2FP32(conv_param); size_t x_size; std::string x_path = "./test_data/conv/convfp32_x_g3_d2_1_28_28_3.bin"; auto x_data = reinterpret_cast(mindspore::lite::ReadFile(x_path.c_str(), &x_size)); @@ -523,6 +549,7 @@ TEST_F(TestConvolutionGradFp32, ConvGroupDilation) { size_t output_data_size = conv_param->output_batch_ * conv_param->output_h_ * conv_param->output_w_ * conv_param->output_channel_; auto y_data = new float[output_data_size]; + ASSERT_NE(y_data, nullptr); std::vector dim_y({1, 26, 26, 18}); lite::Tensor y_tensor(TypeId::kNumberTypeFloat32, dim_y); y_tensor.set_data(y_data); @@ -540,11 +567,12 @@ TEST_F(TestConvolutionGradFp32, ConvGroupDilation) { auto *kernel = new mindspore::kernel::ConvolutionTrainCPUKernel(reinterpret_cast(conv_param), inputs, outputs, &context, 0); + ASSERT_NE(kernel, nullptr); kernel->Init(); mindspore::kernel::LiteKernel::AllocWorkspace(kernel->GetWorkspaceSize()); - kernel->train(); - EXPECT_EQ(kernel->is_train(), 1); + kernel->Train(); + EXPECT_EQ(kernel->IsTrain(), 1); // warm up loop for (int i = 0; i < 3; i++) { @@ -580,6 +608,8 @@ TEST_F(TestConvolutionGradFp32, ConvGroupDilation) { TEST_F(TestConvolutionGradFp32, ConvFp32Dilation2Group2Stride2FilterGrad) { // prepare stage auto conv_param = static_cast(malloc(sizeof(ConvParameter))); + ASSERT_NE(conv_param, nullptr); + conv_param->input_batch_ = 2; conv_param->input_h_ = 32; conv_param->input_w_ = 32; @@ -624,11 +654,13 @@ TEST_F(TestConvolutionGradFp32, ConvFp32Dilation2Group2Stride2FilterGrad) { size_t input_size; std::string input_path = "./test_data/conv/convfp32_input0_d2_g2_s2_2_4_32_32.bin"; auto input_data = reinterpret_cast(mindspore::lite::ReadFile(input_path.c_str(), &input_size)); + ASSERT_NE(input_data, nullptr); std::vector dim_x({2, 32, 32, 4}); lite::Tensor x_tensor(TypeId::kNumberTypeFloat32, dim_x); x_tensor.set_data(input_data); auto dw_data = new float[output_data_size]; + ASSERT_NE(dw_data, nullptr); std::vector dim_dw({12, 3, 3, 2}); lite::Tensor dw_tensor(TypeId::kNumberTypeFloat32, dim_dw); dw_tensor.set_data(dw_data); @@ -641,7 +673,9 @@ TEST_F(TestConvolutionGradFp32, ConvFp32Dilation2Group2Stride2FilterGrad) { kernel::KernelKey desc = {kernel::kCPU, TypeId::kNumberTypeFloat32, schema::PrimitiveType_Conv2DGradFilter}; auto creator = lite::KernelRegistry::GetInstance()->GetCreator(desc); + ASSERT_NE(creator, nullptr); auto kernel = creator(inputs, outputs, reinterpret_cast(conv_param), &context, desc, nullptr); + ASSERT_NE(kernel, nullptr); mindspore::kernel::LiteKernel::AllocWorkspace(kernel->GetWorkspaceSize()); // warm up loop @@ -679,6 +713,8 @@ TEST_F(TestConvolutionGradFp32, ConvFp32Dilation2Group2Stride2FilterGrad) { TEST_F(TestConvolutionGradFp32, ConvGroup2Dilation2Stride2) { // prepare stage auto conv_param = static_cast(malloc(sizeof(ConvParameter))); + ASSERT_NE(conv_param, nullptr); + conv_param->input_batch_ = 2; conv_param->input_h_ = 32; conv_param->input_w_ = 32; @@ -710,6 +746,7 @@ TEST_F(TestConvolutionGradFp32, ConvGroup2Dilation2Stride2) { size_t dy_size; std::string dy_path = "./test_data/conv/convfp32_dy_d2_g2_s2_2_12_15_15.bin"; auto dy_data = reinterpret_cast(mindspore::lite::ReadFile(dy_path.c_str(), &dy_size)); + ASSERT_NE(dy_data, nullptr); std::vector dim_dy({2, 15, 15, 12}); lite::Tensor dy_tensor(TypeId::kNumberTypeFloat32, dim_dy); dy_tensor.set_data(dy_data); @@ -717,6 +754,7 @@ TEST_F(TestConvolutionGradFp32, ConvGroup2Dilation2Stride2) { size_t w_size; std::string w_path = "./test_data/conv/convfp32_w_d2_g2_s2_12_2_3_3.bin"; auto w_data = reinterpret_cast(mindspore::lite::ReadFile(w_path.c_str(), &w_size)); + ASSERT_NE(w_data, nullptr); std::vector dim_w({12, 3, 3, 2}); lite::Tensor w_tensor(TypeId::kNumberTypeFloat32, dim_w); w_tensor.set_data(w_data); @@ -724,6 +762,7 @@ TEST_F(TestConvolutionGradFp32, ConvGroup2Dilation2Stride2) { size_t output_data_size = conv_param->input_batch_ * conv_param->input_h_ * conv_param->input_w_ * conv_param->input_channel_; auto dx_data = new float[output_data_size]; + ASSERT_NE(dx_data, nullptr); std::vector dim_dx({2, 32, 32, 4}); lite::Tensor dx_tensor(TypeId::kNumberTypeFloat32, dim_dx); dx_tensor.set_data(dx_data); @@ -741,7 +780,9 @@ TEST_F(TestConvolutionGradFp32, ConvGroup2Dilation2Stride2) { kernel::KernelKey desc = {kernel::kCPU, TypeId::kNumberTypeFloat32, schema::PrimitiveType_Conv2DGradInput}; auto creator = lite::KernelRegistry::GetInstance()->GetCreator(desc); + ASSERT_NE(creator, nullptr); auto kernel = creator(inputs, outputs, reinterpret_cast(conv_param), &context, desc, nullptr); + ASSERT_NE(kernel, nullptr); mindspore::kernel::LiteKernel::AllocWorkspace(kernel->GetWorkspaceSize()); // warm up loop diff --git a/mindspore/lite/test/ut/src/runtime/kernel/arm/fp32_grad/deconvolution_grad_fp32_tests.cc b/mindspore/lite/test/ut/src/runtime/kernel/arm/fp32_grad/deconvolution_grad_fp32_tests.cc index 571ffa81f8..ecdee356e7 100644 --- a/mindspore/lite/test/ut/src/runtime/kernel/arm/fp32_grad/deconvolution_grad_fp32_tests.cc +++ b/mindspore/lite/test/ut/src/runtime/kernel/arm/fp32_grad/deconvolution_grad_fp32_tests.cc @@ -32,6 +32,8 @@ class TestDeConvolutionGradFp32 : public mindspore::CommonTest { TEST_F(TestDeConvolutionGradFp32, DeConvFp32FilterGrad) { // prepare stage auto conv_param = static_cast(malloc(sizeof(ConvParameter))); + ASSERT_NE(conv_param, nullptr); + conv_param->input_batch_ = 2; conv_param->input_h_ = 32; conv_param->input_w_ = 32; @@ -63,24 +65,24 @@ TEST_F(TestDeConvolutionGradFp32, DeConvFp32FilterGrad) { size_t dy_size; std::string dy_path = "./test_data/deconv/deconvfp32_dy_2_9_63_63.bin"; auto dy_data = reinterpret_cast(mindspore::lite::ReadFile(dy_path.c_str(), &dy_size)); + ASSERT_NE(dy_data, nullptr); std::vector dim_dy({2, 63, 63, 9}); lite::Tensor dy_tensor(TypeId::kNumberTypeFloat32, dim_dy); dy_tensor.set_data(dy_data); - // runtime part - printf("Calculating runtime cost...\n"); - uint64_t time_avg = 0; size_t output_data_size = conv_param->output_channel_ * conv_param->kernel_h_ * conv_param->kernel_w_ * conv_param->input_channel_; size_t input_size; std::string input_path = "./test_data/deconv/deconvfp32_input0_2_3_32_32.bin"; auto input_data = reinterpret_cast(mindspore::lite::ReadFile(input_path.c_str(), &input_size)); + ASSERT_NE(input_data, nullptr); std::vector dim_x({2, 32, 32, 3}); lite::Tensor x_tensor(TypeId::kNumberTypeFloat32, dim_x); x_tensor.set_data(input_data); auto dw_data = new float[output_data_size]; + ASSERT_NE(dw_data, nullptr); std::vector dim_dw({3, 3, 3, 9}); lite::Tensor dw_tensor(TypeId::kNumberTypeFloat32, dim_dw); dw_tensor.set_data(dw_data); @@ -93,7 +95,9 @@ TEST_F(TestDeConvolutionGradFp32, DeConvFp32FilterGrad) { kernel::KernelKey desc = {kernel::kCPU, TypeId::kNumberTypeFloat32, schema::PrimitiveType_DeConv2DGradFilter}; auto creator = lite::KernelRegistry::GetInstance()->GetCreator(desc); + ASSERT_NE(creator, nullptr); auto kernel = creator(inputs, outputs, reinterpret_cast(conv_param), &context, desc, nullptr); + ASSERT_NE(kernel, nullptr); mindspore::kernel::LiteKernel::AllocWorkspace(kernel->GetWorkspaceSize()); // warm up loop @@ -101,6 +105,9 @@ TEST_F(TestDeConvolutionGradFp32, DeConvFp32FilterGrad) { kernel->Run(); } + // runtime part + printf("Calculating runtime cost...\n"); + uint64_t time_avg = 0; int loop_count = 100; auto time_start = mindspore::lite::GetTimeUs(); for (int i = 0; i < loop_count; i++) { @@ -131,6 +138,8 @@ TEST_F(TestDeConvolutionGradFp32, DeConvFp32FilterGrad) { TEST_F(TestDeConvolutionGradFp32, DeConvFp32Dilation2FilterGrad) { // prepare stage auto conv_param = static_cast(malloc(sizeof(ConvParameter))); + ASSERT_NE(conv_param, nullptr); + conv_param->input_batch_ = 2; conv_param->input_h_ = 32; conv_param->input_w_ = 32; @@ -162,24 +171,24 @@ TEST_F(TestDeConvolutionGradFp32, DeConvFp32Dilation2FilterGrad) { size_t dy_size; std::string dy_path = "./test_data/deconv/deconvfp32_dy_d2_2_9_65_65.bin"; auto dy_data = reinterpret_cast(mindspore::lite::ReadFile(dy_path.c_str(), &dy_size)); + ASSERT_NE(dy_data, nullptr); std::vector dim_dy({2, 65, 65, 9}); lite::Tensor dy_tensor(TypeId::kNumberTypeFloat32, dim_dy); dy_tensor.set_data(dy_data); - // runtime part - printf("Calculating runtime cost...\n"); - uint64_t time_avg = 0; size_t output_data_size = conv_param->output_channel_ * conv_param->kernel_h_ * conv_param->kernel_w_ * conv_param->input_channel_; size_t input_size; std::string input_path = "./test_data/deconv/deconvfp32_input0_d2_2_3_32_32.bin"; auto input_data = reinterpret_cast(mindspore::lite::ReadFile(input_path.c_str(), &input_size)); + ASSERT_NE(input_data, nullptr); std::vector dim_x({2, 32, 32, 3}); lite::Tensor x_tensor(TypeId::kNumberTypeFloat32, dim_x); x_tensor.set_data(input_data); auto dw_data = new float[output_data_size]; + ASSERT_NE(dw_data, nullptr); std::vector dim_dw({9, 3, 3, 3}); lite::Tensor dw_tensor(TypeId::kNumberTypeFloat32, dim_dw); dw_tensor.set_data(dw_data); @@ -192,7 +201,9 @@ TEST_F(TestDeConvolutionGradFp32, DeConvFp32Dilation2FilterGrad) { kernel::KernelKey desc = {kernel::kCPU, TypeId::kNumberTypeFloat32, schema::PrimitiveType_DeConv2DGradFilter}; auto creator = lite::KernelRegistry::GetInstance()->GetCreator(desc); + ASSERT_NE(creator, nullptr); auto kernel = creator(inputs, outputs, reinterpret_cast(conv_param), &context, desc, nullptr); + ASSERT_NE(kernel, nullptr); mindspore::kernel::LiteKernel::AllocWorkspace(kernel->GetWorkspaceSize()); // warm up loop @@ -200,6 +211,9 @@ TEST_F(TestDeConvolutionGradFp32, DeConvFp32Dilation2FilterGrad) { kernel->Run(); } + // runtime part + printf("Calculating runtime cost...\n"); + uint64_t time_avg = 0; int loop_count = 100; auto time_start = mindspore::lite::GetTimeUs(); for (int i = 0; i < loop_count; i++) { @@ -230,6 +244,8 @@ TEST_F(TestDeConvolutionGradFp32, DeConvFp32Dilation2FilterGrad) { TEST_F(TestDeConvolutionGradFp32, DeConvFp32Dilation2Group3FilterGrad) { // prepare stage auto conv_param = static_cast(malloc(sizeof(ConvParameter))); + ASSERT_NE(conv_param, nullptr); + conv_param->input_batch_ = 2; conv_param->input_h_ = 32; conv_param->input_w_ = 32; @@ -261,6 +277,7 @@ TEST_F(TestDeConvolutionGradFp32, DeConvFp32Dilation2Group3FilterGrad) { size_t dy_size; std::string dy_path = "./test_data/deconv/deconvfp32_dy_d2_g3_2_9_65_65.bin"; auto dy_data = reinterpret_cast(mindspore::lite::ReadFile(dy_path.c_str(), &dy_size)); + ASSERT_NE(dy_data, nullptr); std::vector dim_dy({2, 65, 65, 9}); lite::Tensor dy_tensor(TypeId::kNumberTypeFloat32, dim_dy); dy_tensor.set_data(dy_data); @@ -274,11 +291,13 @@ TEST_F(TestDeConvolutionGradFp32, DeConvFp32Dilation2Group3FilterGrad) { size_t input_size; std::string input_path = "./test_data/deconv/deconvfp32_input0_d2_g3_2_3_32_32.bin"; auto input_data = reinterpret_cast(mindspore::lite::ReadFile(input_path.c_str(), &input_size)); + ASSERT_NE(input_data, nullptr); std::vector dim_x({2, 32, 32, 3}); lite::Tensor x_tensor(TypeId::kNumberTypeFloat32, dim_x); x_tensor.set_data(input_data); auto dw_data = new float[output_data_size]; + ASSERT_NE(dw_data, nullptr); std::vector dim_dw({3, 3, 3, 3}); lite::Tensor dw_tensor(TypeId::kNumberTypeFloat32, dim_dw); dw_tensor.set_data(dw_data); @@ -291,7 +310,9 @@ TEST_F(TestDeConvolutionGradFp32, DeConvFp32Dilation2Group3FilterGrad) { kernel::KernelKey desc = {kernel::kCPU, TypeId::kNumberTypeFloat32, schema::PrimitiveType_DeConv2DGradFilter}; auto creator = lite::KernelRegistry::GetInstance()->GetCreator(desc); + ASSERT_NE(creator, nullptr); auto kernel = creator(inputs, outputs, reinterpret_cast(conv_param), &context, desc, nullptr); + ASSERT_NE(kernel, nullptr); mindspore::kernel::LiteKernel::AllocWorkspace(kernel->GetWorkspaceSize()); // warm up loop @@ -329,6 +350,8 @@ TEST_F(TestDeConvolutionGradFp32, DeConvFp32Dilation2Group3FilterGrad) { TEST_F(TestDeConvolutionGradFp32, DeConvFp32Dilation2Group3Stride1FilterGrad) { // prepare stage auto conv_param = static_cast(malloc(sizeof(ConvParameter))); + ASSERT_NE(conv_param, nullptr); + conv_param->input_batch_ = 2; conv_param->input_h_ = 32; conv_param->input_w_ = 32; @@ -360,24 +383,24 @@ TEST_F(TestDeConvolutionGradFp32, DeConvFp32Dilation2Group3Stride1FilterGrad) { size_t dy_size; std::string dy_path = "./test_data/deconv/deconvfp32_dy_d2_g3_s1_2_9_34_34.bin"; auto dy_data = reinterpret_cast(mindspore::lite::ReadFile(dy_path.c_str(), &dy_size)); + ASSERT_NE(dy_data, nullptr); std::vector dim_dy({2, 34, 34, 9}); lite::Tensor dy_tensor(TypeId::kNumberTypeFloat32, dim_dy); dy_tensor.set_data(dy_data); - // runtime part - printf("Calculating runtime cost...\n"); - uint64_t time_avg = 0; size_t output_data_size = conv_param->output_channel_ * conv_param->kernel_h_ * conv_param->kernel_w_ * conv_param->input_channel_; size_t input_size; std::string input_path = "./test_data/deconv/deconvfp32_input0_d2_g3_s1_2_3_32_32.bin"; auto input_data = reinterpret_cast(mindspore::lite::ReadFile(input_path.c_str(), &input_size)); + ASSERT_NE(input_data, nullptr); std::vector dim_x({2, 32, 32, 3}); lite::Tensor x_tensor(TypeId::kNumberTypeFloat32, dim_x); x_tensor.set_data(input_data); auto dw_data = new float[output_data_size]; + ASSERT_NE(dw_data, nullptr); std::vector dim_dw({3, 3, 3, 3}); lite::Tensor dw_tensor(TypeId::kNumberTypeFloat32, dim_dw); dw_tensor.set_data(dw_data); @@ -390,7 +413,9 @@ TEST_F(TestDeConvolutionGradFp32, DeConvFp32Dilation2Group3Stride1FilterGrad) { kernel::KernelKey desc = {kernel::kCPU, TypeId::kNumberTypeFloat32, schema::PrimitiveType_DeConv2DGradFilter}; auto creator = lite::KernelRegistry::GetInstance()->GetCreator(desc); + ASSERT_NE(creator, nullptr); auto kernel = creator(inputs, outputs, reinterpret_cast(conv_param), &context, desc, nullptr); + ASSERT_NE(kernel, nullptr); mindspore::kernel::LiteKernel::AllocWorkspace(kernel->GetWorkspaceSize()); // warm up loop @@ -398,6 +423,9 @@ TEST_F(TestDeConvolutionGradFp32, DeConvFp32Dilation2Group3Stride1FilterGrad) { kernel->Run(); } + // runtime part + printf("Calculating runtime cost...\n"); + uint64_t time_avg = 0; int loop_count = 100; auto time_start = mindspore::lite::GetTimeUs(); for (int i = 0; i < loop_count; i++) { @@ -428,6 +456,8 @@ TEST_F(TestDeConvolutionGradFp32, DeConvFp32Dilation2Group3Stride1FilterGrad) { TEST_F(TestDeConvolutionGradFp32, DeConvFp32Dilation2Group2Stride2FilterGrad) { // prepare stage auto conv_param = static_cast(malloc(sizeof(ConvParameter))); + ASSERT_NE(conv_param, nullptr); + conv_param->input_batch_ = 2; conv_param->input_h_ = 32; conv_param->input_w_ = 32; @@ -459,24 +489,24 @@ TEST_F(TestDeConvolutionGradFp32, DeConvFp32Dilation2Group2Stride2FilterGrad) { size_t dy_size; std::string dy_path = "./test_data/deconv/deconvfp32_dy_d2_g2_s2_2_12_65_65.bin"; auto dy_data = reinterpret_cast(mindspore::lite::ReadFile(dy_path.c_str(), &dy_size)); + ASSERT_NE(dy_data, nullptr); std::vector dim_dy({2, 65, 65, 12}); lite::Tensor dy_tensor(TypeId::kNumberTypeFloat32, dim_dy); dy_tensor.set_data(dy_data); - // runtime part - printf("Calculating runtime cost...\n"); - uint64_t time_avg = 0; size_t output_data_size = conv_param->output_channel_ * conv_param->kernel_h_ * conv_param->kernel_w_ * conv_param->input_channel_; size_t input_size; std::string input_path = "./test_data/deconv/deconvfp32_input0_d2_g2_s2_2_4_32_32.bin"; auto input_data = reinterpret_cast(mindspore::lite::ReadFile(input_path.c_str(), &input_size)); + ASSERT_NE(input_data, nullptr); std::vector dim_x({2, 32, 32, 4}); lite::Tensor x_tensor(TypeId::kNumberTypeFloat32, dim_x); x_tensor.set_data(input_data); auto dw_data = new float[output_data_size]; + ASSERT_NE(dw_data, nullptr); std::vector dim_dw({6, 3, 3, 4}); lite::Tensor dw_tensor(TypeId::kNumberTypeFloat32, dim_dw); dw_tensor.set_data(dw_data); @@ -489,7 +519,9 @@ TEST_F(TestDeConvolutionGradFp32, DeConvFp32Dilation2Group2Stride2FilterGrad) { kernel::KernelKey desc = {kernel::kCPU, TypeId::kNumberTypeFloat32, schema::PrimitiveType_DeConv2DGradFilter}; auto creator = lite::KernelRegistry::GetInstance()->GetCreator(desc); + ASSERT_NE(creator, nullptr); auto kernel = creator(inputs, outputs, reinterpret_cast(conv_param), &context, desc, nullptr); + ASSERT_NE(kernel, nullptr); mindspore::kernel::LiteKernel::AllocWorkspace(kernel->GetWorkspaceSize()); // warm up loop @@ -497,6 +529,9 @@ TEST_F(TestDeConvolutionGradFp32, DeConvFp32Dilation2Group2Stride2FilterGrad) { kernel->Run(); } + // runtime part + printf("Calculating runtime cost...\n"); + uint64_t time_avg = 0; int loop_count = 100; auto time_start = mindspore::lite::GetTimeUs(); for (int i = 0; i < loop_count; i++) { @@ -527,6 +562,8 @@ TEST_F(TestDeConvolutionGradFp32, DeConvFp32Dilation2Group2Stride2FilterGrad) { TEST_F(TestDeConvolutionGradFp32, DeConvFp32Dilation2Group12Stride2FilterGrad) { // prepare stage auto conv_param = static_cast(malloc(sizeof(ConvParameter))); + ASSERT_NE(conv_param, nullptr); + conv_param->input_batch_ = 2; conv_param->input_h_ = 32; conv_param->input_w_ = 32; @@ -558,6 +595,7 @@ TEST_F(TestDeConvolutionGradFp32, DeConvFp32Dilation2Group12Stride2FilterGrad) { size_t dy_size; std::string dy_path = "./test_data/deconv/deconvfp32_dy_d2_g12_s2_2_12_65_65.bin"; auto dy_data = reinterpret_cast(mindspore::lite::ReadFile(dy_path.c_str(), &dy_size)); + ASSERT_NE(dy_data, nullptr); std::vector dim_dy({2, 65, 65, 12}); lite::Tensor dy_tensor(TypeId::kNumberTypeFloat32, dim_dy); dy_tensor.set_data(dy_data); @@ -571,11 +609,13 @@ TEST_F(TestDeConvolutionGradFp32, DeConvFp32Dilation2Group12Stride2FilterGrad) { size_t input_size; std::string input_path = "./test_data/deconv/deconvfp32_input0_d2_g12_s2_2_12_32_32.bin"; auto input_data = reinterpret_cast(mindspore::lite::ReadFile(input_path.c_str(), &input_size)); + ASSERT_NE(input_data, nullptr); std::vector dim_x({2, 32, 32, 12}); lite::Tensor x_tensor(TypeId::kNumberTypeFloat32, dim_x); x_tensor.set_data(input_data); auto dw_data = new float[output_data_size]; + ASSERT_NE(dw_data, nullptr); std::vector dim_dw({1, 3, 3, 12}); lite::Tensor dw_tensor(TypeId::kNumberTypeFloat32, dim_dw); dw_tensor.set_data(dw_data); @@ -588,7 +628,9 @@ TEST_F(TestDeConvolutionGradFp32, DeConvFp32Dilation2Group12Stride2FilterGrad) { kernel::KernelKey desc = {kernel::kCPU, TypeId::kNumberTypeFloat32, schema::PrimitiveType_DeConv2DGradFilter}; auto creator = lite::KernelRegistry::GetInstance()->GetCreator(desc); + ASSERT_NE(creator, nullptr); auto kernel = creator(inputs, outputs, reinterpret_cast(conv_param), &context, desc, nullptr); + ASSERT_NE(kernel, nullptr); mindspore::kernel::LiteKernel::AllocWorkspace(kernel->GetWorkspaceSize()); // warm up loop diff --git a/mindspore/lite/test/ut/src/runtime/kernel/arm/fp32_grad/network_test.cc b/mindspore/lite/test/ut/src/runtime/kernel/arm/fp32_grad/network_test.cc index 0e1275cbdf..6f1aa1f51e 100644 --- a/mindspore/lite/test/ut/src/runtime/kernel/arm/fp32_grad/network_test.cc +++ b/mindspore/lite/test/ut/src/runtime/kernel/arm/fp32_grad/network_test.cc @@ -90,6 +90,7 @@ TEST_F(NetworkTest, tuning_layer) { node->primitive = std::make_unique(); node->primitive->value.type = schema::PrimitiveType_Activation; auto primitive = new schema::ActivationT; + ASSERT_NE(primitive, nullptr); primitive->type = schema::ActivationType_RELU; node->primitive->value.value = primitive; node->name = "ReLU"; @@ -102,6 +103,7 @@ TEST_F(NetworkTest, tuning_layer) { node->primitive = std::make_unique(); node->primitive->value.type = schema::PrimitiveType_MatMul; auto primitive = new schema::MatMulT; + ASSERT_NE(primitive, nullptr); primitive->transposeA = false; primitive->transposeB = true; node->primitive->value.value = primitive; @@ -115,6 +117,7 @@ TEST_F(NetworkTest, tuning_layer) { node->primitive = std::make_unique(); node->primitive->value.type = schema::PrimitiveType_BiasAdd; auto primitive = new schema::BiasAddT; + ASSERT_NE(primitive, nullptr); primitive->axis.push_back(0); node->primitive->value.value = primitive; node->name = "BiasAdd"; @@ -127,6 +130,7 @@ TEST_F(NetworkTest, tuning_layer) { node->primitive = std::make_unique(); node->primitive->value.type = schema::PrimitiveType_SoftmaxCrossEntropy; auto primitive = new schema::SoftmaxCrossEntropyT; + ASSERT_NE(primitive, nullptr); primitive->axis.push_back(0); node->primitive->value.value = primitive; node->name = "SoftmaxCrossEntropy"; @@ -139,6 +143,7 @@ TEST_F(NetworkTest, tuning_layer) { node->primitive = std::make_unique(); node->primitive->value.type = schema::PrimitiveType_BiasGrad; auto primitive = new schema::BiasGradT; + ASSERT_NE(primitive, nullptr); primitive->axis.push_back(0); node->primitive->value.value = primitive; node->name = "BiasGrad"; @@ -151,6 +156,7 @@ TEST_F(NetworkTest, tuning_layer) { node->primitive = std::make_unique(); node->primitive->value.type = schema::PrimitiveType_MatMul; auto primitive = new schema::MatMulT; + ASSERT_NE(primitive, nullptr); primitive->transposeA = true; primitive->transposeB = false; node->primitive->value.value = primitive; @@ -164,6 +170,7 @@ TEST_F(NetworkTest, tuning_layer) { node->primitive = std::make_unique(); node->primitive->value.type = schema::PrimitiveType_ApplyMomentum; auto primitive = new schema::ApplyMomentumT; + ASSERT_NE(primitive, nullptr); node->primitive->value.value = primitive; node->name = "Momentum"; meta_graph->nodes.emplace_back(std::move(node)); @@ -175,6 +182,7 @@ TEST_F(NetworkTest, tuning_layer) { node->primitive = std::make_unique(); node->primitive->value.type = schema::PrimitiveType_ApplyMomentum; auto primitive = new schema::ApplyMomentumT; + ASSERT_NE(primitive, nullptr); node->primitive->value.value = primitive; node->name = "Momentum"; meta_graph->nodes.emplace_back(std::move(node)); @@ -450,9 +458,6 @@ TEST_F(NetworkTest, tuning_layer) { std::cout << std::endl; error = RelativeOutputError(outData, output_path); EXPECT_LT(error, 2e-3); - - delete session; - MS_LOG(INFO) << "TuningLayer passed"; } int32_t fileIterator(mindspore::session::TrainSession *session, const std::string &path, @@ -516,6 +521,7 @@ TEST_F(NetworkTest, efficient_net) { auto model = lite::TrainModel::Import(buf, net_size); delete[] buf; auto context = new lite::Context; + ASSERT_NE(context, nullptr); context->device_list_[0].device_info_.cpu_device_info_.cpu_bind_mode_ = lite::NO_BIND; context->thread_num_ = 1; @@ -533,48 +539,6 @@ TEST_F(NetworkTest, efficient_net) { ASSERT_EQ(res, 0); } -TEST_F(NetworkTest, lenetnet) { - char *buf = nullptr; - size_t net_size = 0; - std::string net = "./test_data/nets/lenet_train.ms"; - ReadFile(net.c_str(), &net_size, &buf); - auto model = lite::TrainModel::Import(buf, net_size); - delete[] buf; - auto context = new lite::Context; - context->device_list_[0].device_info_.cpu_device_info_.cpu_bind_mode_ = lite::NO_BIND; - context->thread_num_ = 1; - - // check registration - mindspore::lite::KernelRegistry *reg = mindspore::lite::KernelRegistry::GetInstance(); - mindspore::kernel::KernelKey desc1 = {mindspore::kernel::KERNEL_ARCH::kCPU, kNumberTypeFloat32, - mindspore::schema::PrimitiveType_Conv2D}; - mindspore::kernel::KernelKey desc2 = {mindspore::kernel::KERNEL_ARCH::kCPU, kNumberTypeFloat32, - mindspore::schema::PrimitiveType_DepthwiseConv2D}; - auto regb1 = reg->GetCreator(desc1); - auto regb2 = reg->GetCreator(desc2); - ASSERT_EQ(regb1 == mindspore::kernel::CpuConvTrainFp32KernelCreator, false); - - auto session = session::TrainSession::CreateSession(context); - ASSERT_NE(session, nullptr); - auto ret = session->CompileTrainGraph(model); - ASSERT_EQ(lite::RET_OK, ret); - - auto rega1 = reg->GetCreator(desc1); - auto rega2 = reg->GetCreator(desc2); - ASSERT_EQ(regb1, rega1); - ASSERT_EQ(regb2, rega2); - ASSERT_EQ(rega1 == mindspore::kernel::CpuConvTrainFp32KernelCreator, false); - // end of check registration - - session->Eval(); - std::string in = "./test_data/nets/x_lenet.bin"; - std::string out = "./test_data/nets/y_lenet.bin"; - auto res = runNet(session, in, out, "24"); - delete session; - delete context; - ASSERT_EQ(res, 0); -} - TEST_F(NetworkTest, retina_net) { char *buf = nullptr; size_t net_size = 0; @@ -585,6 +549,7 @@ TEST_F(NetworkTest, retina_net) { auto model = lite::Model::Import(buf, net_size); delete[] buf; auto context = new lite::Context; + ASSERT_NE(context, nullptr); context->device_list_[0].device_info_.cpu_device_info_.cpu_bind_mode_ = lite::NO_BIND; context->thread_num_ = 1; @@ -592,7 +557,7 @@ TEST_F(NetworkTest, retina_net) { auto session = session::LiteSession::CreateSession(context); ASSERT_NE(session, nullptr); auto ret = session->CompileGraph(model); - ASSERT_EQ(lite::RET_OK, ret); + EXPECT_EQ(lite::RET_OK, ret); // session->Eval(); std::string in = "./test_data/nets/test1.hwc_normalized_f32"; @@ -619,8 +584,9 @@ TEST_F(NetworkTest, retina_net) { final_res |= res; } - ASSERT_EQ(final_res, 0); + EXPECT_EQ(final_res, 0); + delete model; delete session; delete context; } @@ -635,6 +601,7 @@ TEST_F(NetworkTest, mobileface_net) { auto model = lite::Model::Import(buf, net_size); delete[] buf; auto context = new lite::Context; + ASSERT_NE(context, nullptr); context->device_list_[0].device_info_.cpu_device_info_.cpu_bind_mode_ = lite::NO_BIND; context->thread_num_ = 1; diff --git a/mindspore/lite/test/ut/src/runtime/kernel/arm/fp32_grad/pooling_grad_fp32_tests.cc b/mindspore/lite/test/ut/src/runtime/kernel/arm/fp32_grad/pooling_grad_fp32_tests.cc index ae7d061f47..212c5d0c81 100644 --- a/mindspore/lite/test/ut/src/runtime/kernel/arm/fp32_grad/pooling_grad_fp32_tests.cc +++ b/mindspore/lite/test/ut/src/runtime/kernel/arm/fp32_grad/pooling_grad_fp32_tests.cc @@ -60,6 +60,8 @@ void InitPoolingParamFP32(PoolingParameter *pooling_param) { TEST_F(TestPoolingGradFp32, AvgPoolingGradFp32) { // prepare stage auto pooling_param = static_cast(malloc(sizeof(PoolingParameter))); + ASSERT_NE(pooling_param, nullptr); + InitPoolingParamFP32(pooling_param); pooling_param->output_channel_ = 3; pooling_param->pool_mode_ = PoolMode_AvgPool; @@ -73,8 +75,10 @@ TEST_F(TestPoolingGradFp32, AvgPoolingGradFp32) { size_t input_size; std::string input_path = "./test_data/pooling/avgpoolgradfp32_1_dy_1_28_28_3.bin"; auto input_data = reinterpret_cast(mindspore::lite::ReadFile(input_path.c_str(), &input_size)); + ASSERT_NE(input_data, nullptr); auto output_data = new float[output_data_size]; + ASSERT_NE(output_data, nullptr); // warm up loop for (int i = 0; i < 3; i++) { AvgPoolingGrad(input_data, output_data, pooling_param, 1); @@ -108,6 +112,8 @@ TEST_F(TestPoolingGradFp32, AvgPoolingGradFp32) { TEST_F(TestPoolingGradFp32, AvgPoolingKernelGradFp32) { // prepare stage auto pooling_param = static_cast(malloc(sizeof(PoolingParameter))); + ASSERT_NE(pooling_param, nullptr); + InitPoolingParamFP32(pooling_param); pooling_param->output_channel_ = 3; pooling_param->pool_mode_ = PoolMode_AvgPool; @@ -121,12 +127,14 @@ TEST_F(TestPoolingGradFp32, AvgPoolingKernelGradFp32) { size_t input_size; std::string input_path = "./test_data/pooling/avgpoolgradfp32_1_dy_1_28_28_3.bin"; auto input_data = reinterpret_cast(mindspore::lite::ReadFile(input_path.c_str(), &input_size)); + ASSERT_NE(input_data, nullptr); std::vector dim_dy({1, 28, 28, 3}); lite::Tensor dy_tensor(TypeId::kNumberTypeFloat32, dim_dy); dy_tensor.set_data(input_data); std::string input1_path = "./test_data/pooling/avgpoolgradfp32_1_x_1_28_28_3.bin"; auto input1_data = reinterpret_cast(mindspore::lite::ReadFile(input1_path.c_str(), &input_size)); + ASSERT_NE(input1_data, nullptr); std::vector dim_x({1, 28, 28, 3}); lite::Tensor x_tensor(TypeId::kNumberTypeFloat32, dim_x); x_tensor.set_data(input1_data); @@ -134,6 +142,7 @@ TEST_F(TestPoolingGradFp32, AvgPoolingKernelGradFp32) { std::vector inputs = {&dy_tensor, &x_tensor}; auto output_data = new float[output_data_size]; + ASSERT_NE(output_data, nullptr); std::vector dim_dx({1, 28, 28, 3}); lite::Tensor dx_tensor(TypeId::kNumberTypeFloat32, dim_dx); dx_tensor.set_data(output_data); @@ -145,7 +154,9 @@ TEST_F(TestPoolingGradFp32, AvgPoolingKernelGradFp32) { kernel::KernelKey desc = {kernel::kCPU, TypeId::kNumberTypeFloat32, schema::PrimitiveType_PoolingGrad}; auto creator = lite::KernelRegistry::GetInstance()->GetCreator(desc); + ASSERT_NE(creator, nullptr); auto kernel_obj = creator(inputs, outputs, reinterpret_cast(pooling_param), &context, desc, nullptr); + ASSERT_NE(kernel_obj, nullptr); kernel_obj->Run(); @@ -172,8 +183,9 @@ TEST_F(TestPoolingGradFp32, AvgPoolingKernelGradFp32) { TEST_F(TestPoolingGradFp32, AvgPoolingBatchGradFp32) { // prepare stage auto pooling_param = static_cast(malloc(sizeof(PoolingParameter))); - InitPoolingParamFP32(pooling_param); + ASSERT_NE(pooling_param, nullptr); + InitPoolingParamFP32(pooling_param); pooling_param->output_channel_ = 3; pooling_param->input_batch_ = 3; pooling_param->output_batch_ = 3; @@ -185,12 +197,14 @@ TEST_F(TestPoolingGradFp32, AvgPoolingBatchGradFp32) { size_t input_size; std::string input_path = "./test_data/pooling/avgpoolgradfp32_1_dy_3_28_28_3.bin"; auto input_data = reinterpret_cast(mindspore::lite::ReadFile(input_path.c_str(), &input_size)); + ASSERT_NE(input_data, nullptr); std::vector dim_dy({3, 28, 28, 3}); lite::Tensor dy_tensor(TypeId::kNumberTypeFloat32, dim_dy); dy_tensor.set_data(input_data); std::string input1_path = "./test_data/pooling/avgpoolgradfp32_1_x_3_28_28_3.bin"; auto input1_data = reinterpret_cast(mindspore::lite::ReadFile(input1_path.c_str(), &input_size)); + ASSERT_NE(input1_data, nullptr); std::vector dim_x({3, 28, 28, 3}); lite::Tensor x_tensor(TypeId::kNumberTypeFloat32, dim_x); x_tensor.set_data(input1_data); @@ -209,7 +223,9 @@ TEST_F(TestPoolingGradFp32, AvgPoolingBatchGradFp32) { kernel::KernelKey desc = {kernel::kCPU, TypeId::kNumberTypeFloat32, schema::PrimitiveType_PoolingGrad}; auto creator = lite::KernelRegistry::GetInstance()->GetCreator(desc); + ASSERT_NE(creator, nullptr); auto kernel_obj = creator(inputs, outputs, reinterpret_cast(pooling_param), &context, desc, nullptr); + ASSERT_NE(kernel_obj, nullptr); kernel_obj->Run(); @@ -236,6 +252,8 @@ TEST_F(TestPoolingGradFp32, AvgPoolGradStride2Fp32) { // prepare stage // input size will be equal to the original size of x, output size will be the output size as in forward auto pool = static_cast(malloc(sizeof(PoolingParameter))); + ASSERT_NE(pool, nullptr); + InitPoolingParamFP32(pool); pool->output_channel_ = 3; pool->pool_mode_ = PoolMode_AvgPool; @@ -250,12 +268,14 @@ TEST_F(TestPoolingGradFp32, AvgPoolGradStride2Fp32) { auto x_data = reinterpret_cast( mindspore::lite::ReadFile("./test_data/pooling/avgpoolgradfp32_s2_x_3_28_28_3.bin", &input_size)); + ASSERT_NE(x_data, nullptr); std::vector dim_x({pool->output_batch_, pool->input_h_, pool->input_w_, pool->input_channel_}); lite::Tensor x_tensor(TypeId::kNumberTypeFloat32, dim_x); x_tensor.set_data(x_data); auto yt_data = reinterpret_cast( mindspore::lite::ReadFile("./test_data/pooling/avgpoolgradfp32_s2_dy_3_28_28_3.bin", &input_size)); + ASSERT_NE(yt_data, nullptr); std::vector dim_y({pool->output_batch_, pool->output_h_, pool->output_w_, pool->output_channel_}); lite::Tensor yt_tensor(TypeId::kNumberTypeFloat32, dim_y); yt_tensor.set_data(yt_data); @@ -271,7 +291,9 @@ TEST_F(TestPoolingGradFp32, AvgPoolGradStride2Fp32) { kernel::KernelKey pool_desc = {kernel::kCPU, TypeId::kNumberTypeFloat32, schema::PrimitiveType_PoolingGrad}; auto pool_creator = lite::KernelRegistry::GetInstance()->GetCreator(pool_desc); + ASSERT_NE(pool_creator, nullptr); auto kernel = pool_creator(inputs, outputs, reinterpret_cast(pool), &context, pool_desc, nullptr); + ASSERT_NE(kernel, nullptr); kernel->Init(); @@ -295,6 +317,8 @@ TEST_F(TestPoolingGradFp32, AvgPoolGradStride3Fp32) { // prepare stage // input size will be equal to the original size of x, output size will be the output size as in forward auto pool = static_cast(malloc(sizeof(PoolingParameter))); + ASSERT_NE(pool, nullptr); + InitPoolingParamFP32(pool); pool->output_channel_ = 3; pool->pool_mode_ = PoolMode_AvgPool; @@ -309,12 +333,14 @@ TEST_F(TestPoolingGradFp32, AvgPoolGradStride3Fp32) { auto x_data = reinterpret_cast( mindspore::lite::ReadFile("./test_data/pooling/avgpoolgradfp32_s3_x_3_28_28_3.bin", &input_size)); + ASSERT_NE(x_data, nullptr); std::vector dim_x({pool->output_batch_, pool->input_h_, pool->input_w_, pool->input_channel_}); lite::Tensor x_tensor(TypeId::kNumberTypeFloat32, dim_x); x_tensor.set_data(x_data); auto yt_data = reinterpret_cast( mindspore::lite::ReadFile("./test_data/pooling/avgpoolgradfp32_s3_dy_3_28_28_3.bin", &input_size)); + ASSERT_NE(yt_data, nullptr); std::vector dim_y({pool->output_batch_, pool->output_h_, pool->output_w_, pool->output_channel_}); lite::Tensor yt_tensor(TypeId::kNumberTypeFloat32, dim_y); yt_tensor.set_data(yt_data); @@ -332,7 +358,9 @@ TEST_F(TestPoolingGradFp32, AvgPoolGradStride3Fp32) { kernel::KernelKey pool_desc = {kernel::kCPU, TypeId::kNumberTypeFloat32, schema::PrimitiveType_PoolingGrad}; auto pool_creator = lite::KernelRegistry::GetInstance()->GetCreator(pool_desc); + ASSERT_NE(pool_creator, nullptr); auto kernel = pool_creator(inputs, outputs, reinterpret_cast(pool), &context, pool_desc, nullptr); + ASSERT_NE(kernel, nullptr); kernel->Init(); @@ -356,6 +384,8 @@ TEST_F(TestPoolingGradFp32, AvgPoolGradStride3Fp32) { TEST_F(TestPoolingGradFp32, MaxPoolingGradFp32) { // prepare stage auto pooling_param = static_cast(malloc(sizeof(PoolingParameter))); + ASSERT_NE(pooling_param, nullptr); + InitPoolingParamFP32(pooling_param); pooling_param->output_channel_ = 3; pooling_param->pool_mode_ = PoolMode_MaxPool; @@ -368,14 +398,18 @@ TEST_F(TestPoolingGradFp32, MaxPoolingGradFp32) { size_t input_size; std::string i_path = "./test_data/pooling/maxpoolgradfp32_1_x_1_28_28_3.bin"; auto in_data = reinterpret_cast(mindspore::lite::ReadFile(i_path.c_str(), &input_size)); + ASSERT_NE(in_data, nullptr); std::string dy_path = "./test_data/pooling/maxpoolgradfp32_1_dy_1_28_28_3.bin"; auto dy_data = reinterpret_cast(mindspore::lite::ReadFile(dy_path.c_str(), &input_size)); + ASSERT_NE(dy_data, nullptr); std::string dx_path = "./test_data/pooling/maxpoolgradfp32_1_dx_1_28_28_3.bin"; auto dx_data = reinterpret_cast(mindspore::lite::ReadFile(dx_path.c_str(), &input_size)); + ASSERT_NE(dx_data, nullptr); auto output_data = new float[output_data_size]; + ASSERT_NE(output_data, nullptr); // warm up loop for (int i = 0; i < 3; i++) { MaxPoolingGrad(in_data, dx_data, dy_data, output_data, pooling_param, 1); @@ -412,6 +446,8 @@ TEST_F(TestPoolingGradFp32, MaxPoolGradBatchFp32) { // prepare stage // input size will be equal to the original size of x, output size will be the output size as in forward auto maxpool = static_cast(malloc(sizeof(PoolingParameter))); + ASSERT_NE(maxpool, nullptr); + InitPoolingParamFP32(maxpool); maxpool->output_channel_ = 3; maxpool->pool_mode_ = PoolMode_MaxPool; @@ -422,18 +458,21 @@ TEST_F(TestPoolingGradFp32, MaxPoolGradBatchFp32) { auto x_data = reinterpret_cast( mindspore::lite::ReadFile("./test_data/pooling/maxpoolgradfp32_1_x_3_28_28_3.bin", &input_size)); + ASSERT_NE(x_data, nullptr); std::vector dim_x({3, 28, 28, 3}); lite::Tensor x_tensor(TypeId::kNumberTypeFloat32, dim_x); x_tensor.set_data(x_data); auto y_data = reinterpret_cast( mindspore::lite::ReadFile("./test_data/pooling/maxpoolgradfp32_1_dx_3_28_28_3.bin", &input_size)); + ASSERT_NE(y_data, nullptr); std::vector dim_y({3, 28, 28, 3}); lite::Tensor y_tensor(TypeId::kNumberTypeFloat32, dim_y); y_tensor.set_data(y_data); auto yt_data = reinterpret_cast( mindspore::lite::ReadFile("./test_data/pooling/maxpoolgradfp32_1_dy_3_28_28_3.bin", &input_size)); + ASSERT_NE(yt_data, nullptr); lite::Tensor yt_tensor(TypeId::kNumberTypeFloat32, dim_y); yt_tensor.set_data(yt_data); @@ -449,8 +488,10 @@ TEST_F(TestPoolingGradFp32, MaxPoolGradBatchFp32) { kernel::KernelKey maxpool_desc = {kernel::kCPU, TypeId::kNumberTypeFloat32, schema::PrimitiveType_PoolingGrad}; auto maxpool_creator = lite::KernelRegistry::GetInstance()->GetCreator(maxpool_desc); + ASSERT_NE(maxpool_creator, nullptr); auto kernel = maxpool_creator(maxpool_inputs, maxpool_outputs, reinterpret_cast(maxpool), &context, maxpool_desc, nullptr); + ASSERT_NE(kernel, nullptr); kernel->Init(); @@ -477,6 +518,8 @@ TEST_F(TestPoolingGradFp32, MaxPoolGradStride2Fp32) { // prepare stage // input size will be equal to the original size of x, output size will be the output size as in forward auto maxpool = static_cast(malloc(sizeof(PoolingParameter))); + ASSERT_NE(maxpool, nullptr); + InitPoolingParamFP32(maxpool); maxpool->output_channel_ = 3; maxpool->input_channel_ = 3; @@ -492,18 +535,21 @@ TEST_F(TestPoolingGradFp32, MaxPoolGradStride2Fp32) { auto x_data = reinterpret_cast( mindspore::lite::ReadFile("./test_data/pooling/maxpoolgradfp32_s2_x_3_28_28_3.bin", &input_size)); + ASSERT_NE(x_data, nullptr); std::vector dim_x({maxpool->output_batch_, maxpool->input_h_, maxpool->input_w_, maxpool->input_channel_}); lite::Tensor x_tensor(TypeId::kNumberTypeFloat32, dim_x); x_tensor.set_data(x_data); auto y_data = reinterpret_cast( mindspore::lite::ReadFile("./test_data/pooling/maxpoolgradfp32_s2_dx_3_28_28_3.bin", &input_size)); + ASSERT_NE(y_data, nullptr); std::vector dim_y({maxpool->output_batch_, maxpool->output_h_, maxpool->output_w_, maxpool->output_channel_}); lite::Tensor y_tensor(TypeId::kNumberTypeFloat32, dim_y); y_tensor.set_data(y_data); auto yt_data = reinterpret_cast( mindspore::lite::ReadFile("./test_data/pooling/maxpoolgradfp32_s2_dy_3_28_28_3.bin", &input_size)); + ASSERT_NE(yt_data, nullptr); lite::Tensor yt_tensor(TypeId::kNumberTypeFloat32, dim_y); yt_tensor.set_data(yt_data); @@ -520,8 +566,10 @@ TEST_F(TestPoolingGradFp32, MaxPoolGradStride2Fp32) { kernel::KernelKey maxpool_desc = {kernel::kCPU, TypeId::kNumberTypeFloat32, schema::PrimitiveType_PoolingGrad}; auto maxpool_creator = lite::KernelRegistry::GetInstance()->GetCreator(maxpool_desc); + ASSERT_NE(maxpool_creator, nullptr); auto kernel = maxpool_creator(maxpool_inputs, maxpool_outputs, reinterpret_cast(maxpool), &context, maxpool_desc, nullptr); + ASSERT_NE(kernel, nullptr); kernel->Init(); @@ -548,6 +596,8 @@ TEST_F(TestPoolingGradFp32, MaxPoolGradStride3Fp32) { // prepare stage // input size will be equal to the original size of x, output size will be the output size as in forward auto maxpool = static_cast(malloc(sizeof(PoolingParameter))); + ASSERT_NE(maxpool, nullptr); + InitPoolingParamFP32(maxpool); maxpool->output_channel_ = 3; maxpool->input_channel_ = 3; @@ -563,18 +613,21 @@ TEST_F(TestPoolingGradFp32, MaxPoolGradStride3Fp32) { auto x_data = reinterpret_cast( mindspore::lite::ReadFile("./test_data/pooling/maxpoolgradfp32_s3_x_3_28_28_3.bin", &input_size)); + ASSERT_NE(x_data, nullptr); std::vector dim_x({maxpool->output_batch_, maxpool->input_h_, maxpool->input_w_, maxpool->input_channel_}); lite::Tensor x_tensor(TypeId::kNumberTypeFloat32, dim_x); x_tensor.set_data(x_data); auto y_data = reinterpret_cast( mindspore::lite::ReadFile("./test_data/pooling/maxpoolgradfp32_s3_dx_3_28_28_3.bin", &input_size)); + ASSERT_NE(y_data, nullptr); std::vector dim_y({maxpool->output_batch_, maxpool->output_h_, maxpool->output_w_, maxpool->output_channel_}); lite::Tensor y_tensor(TypeId::kNumberTypeFloat32, dim_y); y_tensor.set_data(y_data); auto yt_data = reinterpret_cast( mindspore::lite::ReadFile("./test_data/pooling/maxpoolgradfp32_s3_dy_3_28_28_3.bin", &input_size)); + ASSERT_NE(yt_data, nullptr); lite::Tensor yt_tensor(TypeId::kNumberTypeFloat32, dim_y); yt_tensor.set_data(yt_data); @@ -591,11 +644,12 @@ TEST_F(TestPoolingGradFp32, MaxPoolGradStride3Fp32) { kernel::KernelKey maxpool_desc = {kernel::kCPU, TypeId::kNumberTypeFloat32, schema::PrimitiveType_PoolingGrad}; auto maxpool_creator = lite::KernelRegistry::GetInstance()->GetCreator(maxpool_desc); + ASSERT_NE(maxpool_creator, nullptr); auto kernel = maxpool_creator(maxpool_inputs, maxpool_outputs, reinterpret_cast(maxpool), &context, maxpool_desc, nullptr); + ASSERT_NE(kernel, nullptr); kernel->Init(); - kernel->Run(); std::string output_path = "./test_data/pooling/maxpoolgradfp32_s3_xgrad_3_28_28_3.bin"; diff --git a/mindspore/lite/test/ut/src/runtime/kernel/arm/fp32_grad/softmax_crossentropy_fp32_tests.cc b/mindspore/lite/test/ut/src/runtime/kernel/arm/fp32_grad/softmax_crossentropy_fp32_tests.cc index 9988320431..91de1005d4 100644 --- a/mindspore/lite/test/ut/src/runtime/kernel/arm/fp32_grad/softmax_crossentropy_fp32_tests.cc +++ b/mindspore/lite/test/ut/src/runtime/kernel/arm/fp32_grad/softmax_crossentropy_fp32_tests.cc @@ -31,17 +31,21 @@ class TestSoftmaxCrossEntropyFp32 : public mindspore::CommonTest { TEST_F(TestSoftmaxCrossEntropyFp32, SoftmaxCrossEntropyFp32) { // prepare stage auto sce_param = reinterpret_cast(malloc(sizeof(SoftmaxCrossEntropyParameter))); + ASSERT_NE(sce_param, nullptr); size_t input_size; std::string input_path = "./test_data/operators/sce_fp32_1_y_6_4.bin"; auto input_data = reinterpret_cast(mindspore::lite::ReadFile(input_path.c_str(), &input_size)); + ASSERT_NE(input_data, nullptr); std::vector dim_y({6, 4}); lite::Tensor y_tensor(TypeId::kNumberTypeFloat32, dim_y); y_tensor.set_data(input_data); std::string label_path = "./test_data/operators/sce_fp32_1_l_6.bin"; auto ll_labels = reinterpret_cast(mindspore::lite::ReadFile(label_path.c_str(), &input_size)); + ASSERT_NE(ll_labels, nullptr); auto labels = new float[6 * 4]; + ASSERT_NE(labels, nullptr); std::fill(labels, labels + 6 * 4, 0.f); for (int i = 0; i < 6; i++) labels[i * 4 + ll_labels[i]] = 1.0; @@ -52,10 +56,12 @@ TEST_F(TestSoftmaxCrossEntropyFp32, SoftmaxCrossEntropyFp32) { std::vector inputs = {&y_tensor, &l_tensor}; auto loss = new float[1]; + ASSERT_NE(loss, nullptr); std::vector dim_dw({1}); lite::Tensor loss_tensor(TypeId::kNumberTypeFloat32, dim_dw); loss_tensor.set_data(loss); auto grad = new float[24]; + ASSERT_NE(grad, nullptr); lite::Tensor grad_tensor(TypeId::kNumberTypeFloat32, dim_y); grad_tensor.set_data(grad); std::vector outputs = {&loss_tensor, &grad_tensor}; @@ -66,7 +72,9 @@ TEST_F(TestSoftmaxCrossEntropyFp32, SoftmaxCrossEntropyFp32) { kernel::KernelKey desc = {kernel::kCPU, TypeId::kNumberTypeFloat32, schema::PrimitiveType_SoftmaxCrossEntropy}; auto creator = lite::KernelRegistry::GetInstance()->GetCreator(desc); + ASSERT_NE(creator, nullptr); auto kernel_obj = creator(inputs, outputs, reinterpret_cast(sce_param), &context, desc, nullptr); + ASSERT_NE(kernel_obj, nullptr); mindspore::kernel::LiteKernel::AllocWorkspace(kernel_obj->GetWorkspaceSize()); kernel_obj->Run(); @@ -78,16 +86,20 @@ TEST_F(TestSoftmaxCrossEntropyFp32, SoftmaxCrossEntropyFp32) { std::string output_path = "./test_data/operators/sce_fp32_1_loss_1.bin"; CompareOutput(loss, 1, output_path); - ((mindspore::kernel::SparseSoftmaxCrossEntropyWithLogitsCPUKernel *)kernel_obj)->train(); + ((mindspore::kernel::SparseSoftmaxCrossEntropyWithLogitsCPUKernel *)kernel_obj)->Train(); kernel_obj->Run(); - + // normalize by batch size the result + for (int i = 0; i < 24; i++) { + grad[i] /= 6; + } printf("==================output data=================\n"); for (int i = 0; i < 12; i++) { std::cout << grad[i] << " ,"; } std::cout << std::endl; std::string grad_path = "./test_data/operators/sce_fp32_1_dy_6_4.bin"; - CompareOutput(grad, 24, grad_path); + auto res = CompareRelativeOutput(grad, grad_path); + EXPECT_EQ(res, 0); delete[] ll_labels; delete[] labels; diff --git a/mindspore/lite/test/ut/src/runtime/kernel/arm/fp32_grad/softmax_grad_fp32_tests.cc b/mindspore/lite/test/ut/src/runtime/kernel/arm/fp32_grad/softmax_grad_fp32_tests.cc index 6c61cd1e7f..3300a6974a 100644 --- a/mindspore/lite/test/ut/src/runtime/kernel/arm/fp32_grad/softmax_grad_fp32_tests.cc +++ b/mindspore/lite/test/ut/src/runtime/kernel/arm/fp32_grad/softmax_grad_fp32_tests.cc @@ -55,6 +55,7 @@ void InitSoftMaxParam(SoftmaxParameter *softmax_param, int axis, int n, int c, i TEST_F(TestSoftmaxGradFp32, SoftmaxGradAxis0) { auto softmax_param = new SoftmaxParameter(); + ASSERT_NE(softmax_param, nullptr); // set parameters InitSoftMaxParam(softmax_param, 0); @@ -64,21 +65,23 @@ TEST_F(TestSoftmaxGradFp32, SoftmaxGradAxis0) { inner_size *= softmax_param->input_shape_[i]; } float *sum_data = new (std::nothrow) float[inner_size]; + ASSERT_NE(sum_data, nullptr); float *sum_mul = new (std::nothrow) float[inner_size * softmax_param->input_shape_[softmax_param->axis_]]; + ASSERT_NE(sum_mul, nullptr); std::vector shape = {1, 9, 11, 12}; size_t input_size; std::string input_path = "./test_data/softmax/softmaxgrad_yinput.bin"; auto input_data = reinterpret_cast(mindspore::lite::ReadFile(input_path.c_str(), &input_size)); - + ASSERT_NE(input_data, nullptr); std::string yt_path = "./test_data/softmax/softmaxgrad_yt_input.bin"; auto yt_data = reinterpret_cast(mindspore::lite::ReadFile(yt_path.c_str(), &input_size)); - + ASSERT_NE(yt_data, nullptr); // runtime part printf("Calculating runtime cost...\n"); uint64_t time_avg = 0; auto out_data = new float[softmax_param->element_size_]; - + ASSERT_NE(out_data, nullptr); // warm up loop for (int i = 0; i < 3; i++) { SoftmaxGrad(input_data, yt_data, out_data, sum_data, sum_mul, softmax_param); @@ -112,6 +115,7 @@ TEST_F(TestSoftmaxGradFp32, SoftmaxGradAxis0) { TEST_F(TestSoftmaxGradFp32, SoftmaxGradAxis1) { auto softmax_param = new SoftmaxParameter(); + ASSERT_NE(softmax_param, nullptr); // set parameters InitSoftMaxParam(softmax_param, 1); @@ -121,21 +125,26 @@ TEST_F(TestSoftmaxGradFp32, SoftmaxGradAxis1) { inner_size *= softmax_param->input_shape_[i]; } float *sum_data = new (std::nothrow) float[inner_size]; + ASSERT_NE(sum_data, nullptr); float *sum_mul = new (std::nothrow) float[inner_size * softmax_param->input_shape_[softmax_param->axis_]]; + ASSERT_NE(sum_mul, nullptr); std::vector shape = {1, 9, 11, 12}; size_t input_size; std::string input_path = "./test_data/softmax/softmaxgrad_1_yinput.bin"; auto input_data = reinterpret_cast(mindspore::lite::ReadFile(input_path.c_str(), &input_size)); + ASSERT_NE(input_data, nullptr); std::string yt_path = "./test_data/softmax/softmaxgrad_1_yt_input.bin"; auto yt_data = reinterpret_cast(mindspore::lite::ReadFile(yt_path.c_str(), &input_size)); + ASSERT_NE(yt_data, nullptr); // runtime part printf("Calculating runtime cost...\n"); uint64_t time_avg = 0; auto out_data = new float[softmax_param->element_size_]; + ASSERT_NE(out_data, nullptr); // warm up loop for (int i = 0; i < 3; i++) { @@ -171,6 +180,7 @@ TEST_F(TestSoftmaxGradFp32, SoftmaxGradAxis1) { TEST_F(TestSoftmaxGradFp32, SoftmaxGradAxis2) { auto softmax_param = new SoftmaxParameter(); + ASSERT_NE(softmax_param, nullptr); // set parameters InitSoftMaxParam(softmax_param, 2); @@ -180,21 +190,26 @@ TEST_F(TestSoftmaxGradFp32, SoftmaxGradAxis2) { inner_size *= softmax_param->input_shape_[i]; } float *sum_data = new (std::nothrow) float[inner_size]; + ASSERT_NE(sum_data, nullptr); float *sum_mul = new (std::nothrow) float[inner_size * softmax_param->input_shape_[softmax_param->axis_]]; + ASSERT_NE(sum_mul, nullptr); std::vector shape = {1, 9, 11, 12}; size_t input_size; std::string input_path = "./test_data/softmax/softmaxgrad_2_yinput.bin"; auto input_data = reinterpret_cast(mindspore::lite::ReadFile(input_path.c_str(), &input_size)); + ASSERT_NE(input_data, nullptr); std::string yt_path = "./test_data/softmax/softmaxgrad_2_yt_input.bin"; auto yt_data = reinterpret_cast(mindspore::lite::ReadFile(yt_path.c_str(), &input_size)); + ASSERT_NE(yt_data, nullptr); // runtime part printf("Calculating runtime cost...\n"); uint64_t time_avg = 0; auto out_data = new float[softmax_param->element_size_]; + ASSERT_NE(out_data, nullptr); // warm up loop for (int i = 0; i < 3; i++) { @@ -230,6 +245,7 @@ TEST_F(TestSoftmaxGradFp32, SoftmaxGradAxis2) { TEST_F(TestSoftmaxGradFp32, SoftmaxGradAxis3) { auto softmax_param = new SoftmaxParameter(); + ASSERT_NE(softmax_param, nullptr); // set parameters InitSoftMaxParam(softmax_param, 3); @@ -239,21 +255,25 @@ TEST_F(TestSoftmaxGradFp32, SoftmaxGradAxis3) { inner_size *= softmax_param->input_shape_[i]; } float *sum_data = new (std::nothrow) float[inner_size]; + ASSERT_NE(sum_data, nullptr); float *sum_mul = new (std::nothrow) float[inner_size * softmax_param->input_shape_[softmax_param->axis_]]; + ASSERT_NE(sum_mul, nullptr); std::vector shape = {1, 9, 11, 12}; size_t input_size; std::string input_path = "./test_data/softmax/softmaxgrad_3_yinput.bin"; auto input_data = reinterpret_cast(mindspore::lite::ReadFile(input_path.c_str(), &input_size)); - + ASSERT_NE(input_data, nullptr); std::string yt_path = "./test_data/softmax/softmaxgrad_3_yt_input.bin"; auto yt_data = reinterpret_cast(mindspore::lite::ReadFile(yt_path.c_str(), &input_size)); + ASSERT_NE(yt_data, nullptr); // runtime part printf("Calculating runtime cost...\n"); uint64_t time_avg = 0; auto out_data = new float[softmax_param->element_size_]; + ASSERT_NE(out_data, nullptr); // warm up loop for (int i = 0; i < 3; i++) { @@ -289,6 +309,8 @@ TEST_F(TestSoftmaxGradFp32, SoftmaxGradAxis3) { TEST_F(TestSoftmaxGradFp32, SoftmaxGradAxisMinus1) { auto softmax_param = new SoftmaxParameter(); + ASSERT_NE(softmax_param, nullptr); + // set parameters InitSoftMaxParam(softmax_param, -1); @@ -298,21 +320,25 @@ TEST_F(TestSoftmaxGradFp32, SoftmaxGradAxisMinus1) { inner_size *= softmax_param->input_shape_[i]; } float *sum_data = new (std::nothrow) float[inner_size]; + ASSERT_NE(sum_data, nullptr); float *sum_mul = new (std::nothrow) float[inner_size * softmax_param->input_shape_[softmax_param->axis_]]; + ASSERT_NE(sum_mul, nullptr); std::vector shape = {1, 9, 11, 12}; size_t input_size; std::string input_path = "./test_data/softmax/softmaxgrad_-1_yinput.bin"; auto input_data = reinterpret_cast(mindspore::lite::ReadFile(input_path.c_str(), &input_size)); + ASSERT_NE(input_data, nullptr); std::string yt_path = "./test_data/softmax/softmaxgrad_-1_yt_input.bin"; auto yt_data = reinterpret_cast(mindspore::lite::ReadFile(yt_path.c_str(), &input_size)); - + ASSERT_NE(yt_data, nullptr); // runtime part printf("Calculating runtime cost...\n"); uint64_t time_avg = 0; auto out_data = new float[softmax_param->element_size_]; + ASSERT_NE(out_data, nullptr); // warm up loop for (int i = 0; i < 3; i++) { diff --git a/mindspore/lite/test/ut/src/runtime/kernel/arm/test_data/activationGrad/hsig_out_50.bin b/mindspore/lite/test/ut/src/runtime/kernel/arm/test_data/activationGrad/hsig_out_50.bin new file mode 100644 index 0000000000..2e65e6d409 Binary files /dev/null and b/mindspore/lite/test/ut/src/runtime/kernel/arm/test_data/activationGrad/hsig_out_50.bin differ diff --git a/mindspore/lite/test/ut/src/runtime/kernel/arm/test_data/activationGrad/hsig_x_50.bin b/mindspore/lite/test/ut/src/runtime/kernel/arm/test_data/activationGrad/hsig_x_50.bin new file mode 100644 index 0000000000..c01986eae4 Binary files /dev/null and b/mindspore/lite/test/ut/src/runtime/kernel/arm/test_data/activationGrad/hsig_x_50.bin differ diff --git a/mindspore/lite/test/ut/src/runtime/kernel/arm/test_data/activationGrad/hsig_yt_50.bin b/mindspore/lite/test/ut/src/runtime/kernel/arm/test_data/activationGrad/hsig_yt_50.bin new file mode 100644 index 0000000000..7502f7121b Binary files /dev/null and b/mindspore/lite/test/ut/src/runtime/kernel/arm/test_data/activationGrad/hsig_yt_50.bin differ diff --git a/mindspore/lite/test/ut/src/runtime/kernel/arm/test_data/activationGrad/hswish_out_50.bin b/mindspore/lite/test/ut/src/runtime/kernel/arm/test_data/activationGrad/hswish_out_50.bin index 6ff3dd84c9..71785be6c3 100644 Binary files a/mindspore/lite/test/ut/src/runtime/kernel/arm/test_data/activationGrad/hswish_out_50.bin and b/mindspore/lite/test/ut/src/runtime/kernel/arm/test_data/activationGrad/hswish_out_50.bin differ diff --git a/mindspore/lite/test/ut/src/runtime/kernel/arm/test_data/activationGrad/hswish_x_50.bin b/mindspore/lite/test/ut/src/runtime/kernel/arm/test_data/activationGrad/hswish_x_50.bin index b9341ce212..eaa2049f01 100644 Binary files a/mindspore/lite/test/ut/src/runtime/kernel/arm/test_data/activationGrad/hswish_x_50.bin and b/mindspore/lite/test/ut/src/runtime/kernel/arm/test_data/activationGrad/hswish_x_50.bin differ diff --git a/mindspore/lite/test/ut/src/runtime/kernel/arm/test_data/activationGrad/hswish_yt_50.bin b/mindspore/lite/test/ut/src/runtime/kernel/arm/test_data/activationGrad/hswish_yt_50.bin index b25a5c7787..6e0ffb821d 100644 Binary files a/mindspore/lite/test/ut/src/runtime/kernel/arm/test_data/activationGrad/hswish_yt_50.bin and b/mindspore/lite/test/ut/src/runtime/kernel/arm/test_data/activationGrad/hswish_yt_50.bin differ diff --git a/mindspore/lite/test/ut/src/runtime/kernel/arm/test_data/bngrad/dy_2_4_5_3.bin b/mindspore/lite/test/ut/src/runtime/kernel/arm/test_data/bngrad/dy_2_4_5_3.bin index 2ccfc68d73..e02252dd6b 100644 Binary files a/mindspore/lite/test/ut/src/runtime/kernel/arm/test_data/bngrad/dy_2_4_5_3.bin and b/mindspore/lite/test/ut/src/runtime/kernel/arm/test_data/bngrad/dy_2_4_5_3.bin differ diff --git a/mindspore/lite/test/ut/src/runtime/kernel/arm/test_data/bngrad/input_x_2_4_5_3.bin b/mindspore/lite/test/ut/src/runtime/kernel/arm/test_data/bngrad/input_x_2_4_5_3.bin index a194c59c0a..9754647732 100644 --- a/mindspore/lite/test/ut/src/runtime/kernel/arm/test_data/bngrad/input_x_2_4_5_3.bin +++ b/mindspore/lite/test/ut/src/runtime/kernel/arm/test_data/bngrad/input_x_2_4_5_3.bin @@ -1,2 +1,2 @@ -V_?Kϧ࿅>J?/="m?Luj@!U$?f=?e[?Wھ m ? eO?}4?B?7E :?JͿ̬> ? ~?ϫN1?> HV|ʾ={IU?xvW>[$?]4Bu 4@+?z>uB?=|e >M>>?}0?> @=">: @<>+R -b6.?i?v?`j6R~]?JU6sG?M% ?h>ȿ G½?>ӓ'6?@2/VK5T>X]?[?v_ؿj?p?\l?.l=b? \ No newline at end of file +CE€&sľpѿ fe'? +ϫ?uP>(?/>¿[QR>Y;ov1?rUt?jb?WM>,;4%\4|_k!ȈTھ fh^>&?%V}';#f> NDm\>X>1?[>c@‹B@3 H?HK6?B{{ z?*>0?1*j? q?/!"I5>HA?Pc< O@P?+ڳv>?(@n?T=j ?f:?MJC>HD뼵=? Ɔ!J \U? UFl>% ?D޿cK>?YQdi0>y?_?b* ?f23?5>iU>J^(?8=~\K>e?=KK~= \ No newline at end of file diff --git a/mindspore/lite/test/ut/src/runtime/kernel/arm/test_data/bngrad/output_dbias_3.bin b/mindspore/lite/test/ut/src/runtime/kernel/arm/test_data/bngrad/output_dbias_3.bin index ac2915f01f..6279d91483 100644 Binary files a/mindspore/lite/test/ut/src/runtime/kernel/arm/test_data/bngrad/output_dbias_3.bin and b/mindspore/lite/test/ut/src/runtime/kernel/arm/test_data/bngrad/output_dbias_3.bin differ diff --git a/mindspore/lite/test/ut/src/runtime/kernel/arm/test_data/bngrad/output_dscale_3.bin b/mindspore/lite/test/ut/src/runtime/kernel/arm/test_data/bngrad/output_dscale_3.bin index cf8cfa4b05..4d31c84f93 100644 --- a/mindspore/lite/test/ut/src/runtime/kernel/arm/test_data/bngrad/output_dscale_3.bin +++ b/mindspore/lite/test/ut/src/runtime/kernel/arm/test_data/bngrad/output_dscale_3.bin @@ -1 +1 @@ -BBB \ No newline at end of file +rBB]B \ No newline at end of file diff --git a/mindspore/lite/test/ut/src/runtime/kernel/arm/test_data/bngrad/output_dx_2_4_5_3.bin b/mindspore/lite/test/ut/src/runtime/kernel/arm/test_data/bngrad/output_dx_2_4_5_3.bin index daf908d964..110652f810 100644 Binary files a/mindspore/lite/test/ut/src/runtime/kernel/arm/test_data/bngrad/output_dx_2_4_5_3.bin and b/mindspore/lite/test/ut/src/runtime/kernel/arm/test_data/bngrad/output_dx_2_4_5_3.bin differ diff --git a/mindspore/lite/test/ut/src/runtime/kernel/arm/test_data/bngrad/running_mean_3.bin b/mindspore/lite/test/ut/src/runtime/kernel/arm/test_data/bngrad/running_mean_3.bin index 7cf94f5cd0..4cc4654b7e 100644 --- a/mindspore/lite/test/ut/src/runtime/kernel/arm/test_data/bngrad/running_mean_3.bin +++ b/mindspore/lite/test/ut/src/runtime/kernel/arm/test_data/bngrad/running_mean_3.bin @@ -1 +1 @@ --<<= \ No newline at end of file +8<"> \ No newline at end of file diff --git a/mindspore/lite/test/ut/src/runtime/kernel/arm/test_data/bngrad/running_var_3.bin b/mindspore/lite/test/ut/src/runtime/kernel/arm/test_data/bngrad/running_var_3.bin index c124f424e8..10a5cce277 100644 --- a/mindspore/lite/test/ut/src/runtime/kernel/arm/test_data/bngrad/running_var_3.bin +++ b/mindspore/lite/test/ut/src/runtime/kernel/arm/test_data/bngrad/running_var_3.bin @@ -1 +1 @@ -c? ?;[? \ No newline at end of file +?`?? \ No newline at end of file diff --git a/mindspore/lite/test/ut/src/runtime/kernel/arm/test_data/bngrad/save_mean_3.bin b/mindspore/lite/test/ut/src/runtime/kernel/arm/test_data/bngrad/save_mean_3.bin index a3cd501da7..3351ce87e0 100644 --- a/mindspore/lite/test/ut/src/runtime/kernel/arm/test_data/bngrad/save_mean_3.bin +++ b/mindspore/lite/test/ut/src/runtime/kernel/arm/test_data/bngrad/save_mean_3.bin @@ -1 +1 @@ -:c2;@e< \ No newline at end of file +:>) \ No newline at end of file diff --git a/mindspore/lite/test/ut/src/runtime/kernel/arm/test_data/bngrad/save_var_3.bin b/mindspore/lite/test/ut/src/runtime/kernel/arm/test_data/bngrad/save_var_3.bin index aa2b0f6ede..74e5b4d0f5 100644 --- a/mindspore/lite/test/ut/src/runtime/kernel/arm/test_data/bngrad/save_var_3.bin +++ b/mindspore/lite/test/ut/src/runtime/kernel/arm/test_data/bngrad/save_var_3.bin @@ -1 +1 @@ -=}?M?Z|? \ No newline at end of file +vr?90?? \ No newline at end of file diff --git a/mindspore/lite/test/ut/src/runtime/kernel/arm/test_data/operators/fc_b_grad.f32 b/mindspore/lite/test/ut/src/runtime/kernel/arm/test_data/operators/fc_b_grad.f32 new file mode 100644 index 0000000000..098b242555 Binary files /dev/null and b/mindspore/lite/test/ut/src/runtime/kernel/arm/test_data/operators/fc_b_grad.f32 differ diff --git a/mindspore/lite/test/ut/src/runtime/kernel/arm/test_data/operators/fc_yt.f32 b/mindspore/lite/test/ut/src/runtime/kernel/arm/test_data/operators/fc_yt.f32 new file mode 100644 index 0000000000..58779f9fc0 Binary files /dev/null and b/mindspore/lite/test/ut/src/runtime/kernel/arm/test_data/operators/fc_yt.f32 differ diff --git a/mindspore/lite/test/ut/src/runtime/kernel/arm/test_data/operators/x1_grad_maximum.bin b/mindspore/lite/test/ut/src/runtime/kernel/arm/test_data/operators/x1_grad_maximum.bin new file mode 100644 index 0000000000..0f7481b380 Binary files /dev/null and b/mindspore/lite/test/ut/src/runtime/kernel/arm/test_data/operators/x1_grad_maximum.bin differ diff --git a/mindspore/lite/test/ut/src/runtime/kernel/arm/test_data/operators/x1_maximum.bin b/mindspore/lite/test/ut/src/runtime/kernel/arm/test_data/operators/x1_maximum.bin new file mode 100644 index 0000000000..b0ec219f21 Binary files /dev/null and b/mindspore/lite/test/ut/src/runtime/kernel/arm/test_data/operators/x1_maximum.bin differ diff --git a/mindspore/lite/test/ut/src/runtime/kernel/arm/test_data/operators/x2_grad_maximum.bin b/mindspore/lite/test/ut/src/runtime/kernel/arm/test_data/operators/x2_grad_maximum.bin new file mode 100644 index 0000000000..b2caa77eb7 Binary files /dev/null and b/mindspore/lite/test/ut/src/runtime/kernel/arm/test_data/operators/x2_grad_maximum.bin differ diff --git a/mindspore/lite/test/ut/src/runtime/kernel/arm/test_data/operators/x2_maximum.bin b/mindspore/lite/test/ut/src/runtime/kernel/arm/test_data/operators/x2_maximum.bin new file mode 100644 index 0000000000..8337142839 Binary files /dev/null and b/mindspore/lite/test/ut/src/runtime/kernel/arm/test_data/operators/x2_maximum.bin differ diff --git a/mindspore/lite/test/ut/src/runtime/kernel/arm/test_data/operators/yt_maximum.bin b/mindspore/lite/test/ut/src/runtime/kernel/arm/test_data/operators/yt_maximum.bin new file mode 100644 index 0000000000..c064a92962 Binary files /dev/null and b/mindspore/lite/test/ut/src/runtime/kernel/arm/test_data/operators/yt_maximum.bin differ diff --git a/mindspore/lite/tools/anf_exporter/anf_exporter.cc b/mindspore/lite/tools/anf_exporter/anf_exporter.cc index d28eabbc09..61799f64b5 100644 --- a/mindspore/lite/tools/anf_exporter/anf_exporter.cc +++ b/mindspore/lite/tools/anf_exporter/anf_exporter.cc @@ -297,6 +297,7 @@ int AnfExporter::ConvertInputCNode(const std::shared_ptr input_anode, s #endif } else { auto inputs = input_cnode->inputs(); + if (inputs.size() != 3) { MS_LOG(ERROR) << "TupleGetItem should have 3 inputs, got " << inputs.size(); return RET_ERROR; @@ -440,43 +441,8 @@ int AnfExporter::ConvertInputValueNode(std::shared_ptr input_anode, output_cnode->inputIndex.emplace_back(meta_graphT->allTensors.size()); meta_graphT->allTensors.emplace_back(std::move(paramTensor)); } else if (value->isa()) { -#ifndef SUPPORT_TRAIN MS_LOG(DEBUG) << "Value type is ValueSequence."; return RET_OK; -#else - auto valueAbstract = valueNode->abstract(); - auto abstractSequnce = utils::cast(valueAbstract); - if (abstractSequnce->isa()) { - auto abstractTuple = utils::cast(valueAbstract); - auto x_shape_data = abstractTuple->elements(); - std::vector shape; - for (std::size_t i = 0; i < abstractTuple->size(); ++i) { - auto value_track = x_shape_data[i]->GetValueTrack(); - if (value_track == nullptr) { - ReturnCode::GetSingleReturnCode()->UpdateReturnCode(RET_NULL_PTR); - return RET_NULL_PTR; - } - if (value_track->isa()) { - shape.push_back((GetValue(value_track))); - } else { - MS_LOG(ERROR) << "Value type is ValueSequence is not integer, it is " << value_track->ToString() << "."; - } - } - if (shape.size()) { - auto typePtr = abstractTuple->elements()[0]->GetTypeTrack(); // abstractTuple->GetTypeTrack(); - paramTensor->dataType = typePtr->type_id(); - paramTensor->dims = {static_cast(shape.size())}; - paramTensor->nodeType = schema::NodeType_ValueNode; - paramTensor->data.resize(shape.size() * sizeof(int)); - memcpy(paramTensor->data.data(), shape.data(), shape.size() * sizeof(int)); - node_id_map_[valueNode->fullname_with_scope()] = meta_graphT->allTensors.size(); - output_cnode->inputIndex.emplace_back(meta_graphT->allTensors.size()); - meta_graphT->allTensors.emplace_back(std::move(paramTensor)); - } - } else { - MS_LOG(ERROR) << "Value type is ValueSequence not supported - " << valueAbstract->type_name() << "."; - } -#endif } else if (value->isa()) { auto valueAbstract = valueNode->abstract(); auto abstractScalar = utils::cast(valueAbstract); diff --git a/mindspore/lite/tools/common/node_util.cc b/mindspore/lite/tools/common/node_util.cc index dcc082ef0f..273f164f43 100644 --- a/mindspore/lite/tools/common/node_util.cc +++ b/mindspore/lite/tools/common/node_util.cc @@ -52,15 +52,10 @@ static const std::vector nhwcOpList = { schema::PrimitiveType_SpaceToDepth, schema::PrimitiveType_DepthToSpace}; -static const std::vector nhwcOpDualInputList = { -#ifdef SUPPORT_TRAIN - schema::PrimitiveType_Conv2DGradFilter, schema::PrimitiveType_BNGrad -#endif -}; - static const std::vector nhwcOpAllInputList = { #ifdef SUPPORT_TRAIN - schema::PrimitiveType_PoolingGrad, schema::PrimitiveType_ActivationGrad + schema::PrimitiveType_PoolingGrad, schema::PrimitiveType_ActivationGrad, schema::PrimitiveType_Conv2DGradFilter, + schema::PrimitiveType_BNGrad #endif }; @@ -156,8 +151,6 @@ std::vector Getfp32FullOpList() { return fp32FullOpList; std::vector GetNhwcOpList() { return nhwcOpList; } -std::vector GetNhwcDualInputOpList() { return nhwcOpDualInputList; } - std::vector GetNhwcAllInputOpList() { return nhwcOpAllInputList; } std::vector GetUint8NhwcOpList() { return int8NeedNhwcOpList; } diff --git a/mindspore/lite/tools/common/node_util.h b/mindspore/lite/tools/common/node_util.h index 51d85ab1a5..d78ea34104 100644 --- a/mindspore/lite/tools/common/node_util.h +++ b/mindspore/lite/tools/common/node_util.h @@ -48,8 +48,6 @@ std::vector GetInsertOpList(); std::vector GetNhwcOpList(); -std::vector GetNhwcDualInputOpList(); - std::vector GetNhwcAllInputOpList(); std::vector Getfp32FullOpList(); diff --git a/mindspore/lite/tools/converter/converter_flags.cc b/mindspore/lite/tools/converter/converter_flags.cc index ab7a44d54a..cd3d1cc839 100644 --- a/mindspore/lite/tools/converter/converter_flags.cc +++ b/mindspore/lite/tools/converter/converter_flags.cc @@ -154,6 +154,25 @@ int Flags::Init(int argc, const char **argv) { std::cerr << "INPUT ILLEGAL: trainModel must be true|false "; return RET_INPUT_PARAM_INVALID; } + + if (this->trainModel == true) { + if (this->fmk != FmkType_MS) { + std::cerr << "INPUT ILLEGAL: train model convertor supporting only MINDIR format"; + return RET_INPUT_PARAM_INVALID; + } + if ((this->inputDataType != TypeId::kNumberTypeFloat32) && (this->inputDataType != TypeId::kTypeUnknown)) { + std::cerr << "INPUT ILLEGAL: train model convertor supporting only FP32 input tensors"; + return RET_INPUT_PARAM_INVALID; + } + if ((this->outputDataType != TypeId::kNumberTypeFloat32) && (this->outputDataType != TypeId::kTypeUnknown)) { + std::cerr << "INPUT ILLEGAL: train model convertor supporting only FP32 output tensors"; + return RET_INPUT_PARAM_INVALID; + } + if (this->quantType != QuantType_QUANT_NONE) { + std::cerr << "INPUT ILLEGAL: train model convertor is not supporting quantization"; + return RET_INPUT_PARAM_INVALID; + } + } return RET_OK; } } // namespace converter diff --git a/mindspore/lite/tools/converter/graphdef_transform.cc b/mindspore/lite/tools/converter/graphdef_transform.cc index 6f682e2b89..5a0597e04d 100644 --- a/mindspore/lite/tools/converter/graphdef_transform.cc +++ b/mindspore/lite/tools/converter/graphdef_transform.cc @@ -49,7 +49,9 @@ int GraphDefTransform::Transform(const converter::Flags &ctx) { { Optimizer unusedOpRemoveOptimizer; unusedOpRemoveOptimizer.AddPass(new UnusedNodeRemovePass()); - unusedOpRemoveOptimizer.AddPass(new DropoutNodeRemovePass()); + if (ctx.trainModel == false) { + unusedOpRemoveOptimizer.AddPass(new DropoutNodeRemovePass()); + } unusedOpRemoveOptimizer.AddPass(new IsolatedNodeRemovePass()); status = unusedOpRemoveOptimizer.Run(graphDefT); if (status != RET_OK && status != RET_NO_CHANGE) { diff --git a/mindspore/lite/tools/converter/legacy_optimizer/graph/dropout_node_remove_pass.cc b/mindspore/lite/tools/converter/legacy_optimizer/graph/dropout_node_remove_pass.cc index 6b6ec109b7..bec2c0328a 100644 --- a/mindspore/lite/tools/converter/legacy_optimizer/graph/dropout_node_remove_pass.cc +++ b/mindspore/lite/tools/converter/legacy_optimizer/graph/dropout_node_remove_pass.cc @@ -54,8 +54,8 @@ STATUS IsolateDropoutNode(schema::MetaGraphT *graphT, size_t nodeIdx) { } auto postNodeIdxes = GetOutputNodeIdx(*graphT, nodeIdx, 1); if (postNodeIdxes.size() != 0) { - MS_LOG(ERROR) << "Unsupported Dropout: " << node->name.c_str() << " with mask output."; - return RET_ERROR; + MS_LOG(WARNING) << "Unsupported Dropout: " << node->name.c_str() << " with mask output."; + return RET_OK; } } auto inDataTensorIdx = inputTensorIdxes.front(); diff --git a/mindspore/lite/tools/converter/legacy_optimizer/graph/format_trans_pass.cc b/mindspore/lite/tools/converter/legacy_optimizer/graph/format_trans_pass.cc index aaf9324392..6699974d5b 100644 --- a/mindspore/lite/tools/converter/legacy_optimizer/graph/format_trans_pass.cc +++ b/mindspore/lite/tools/converter/legacy_optimizer/graph/format_trans_pass.cc @@ -148,6 +148,7 @@ STATUS FormatTransPass::DoNodeInoutFormatTrans(schema::MetaGraphT *graph) { #ifdef SUPPORT_TRAIN if (IsContain(GetNhwcAllInputOpList(), GetCNodeTType(**iter))) { int idx_num = node->inputIndex.size(); + if (GetCNodeTType(**iter) == schema::PrimitiveType_BNGrad) idx_num = 2; for (int i = 0; i < idx_num; i++) { iter = InsertFormatTransNode(graph, iter, kBefore, i, beforeNodeType, &status); if (status != RET_OK) { @@ -155,14 +156,6 @@ STATUS FormatTransPass::DoNodeInoutFormatTrans(schema::MetaGraphT *graph) { return RET_ERROR; } } - } else if (IsContain(GetNhwcDualInputOpList(), GetCNodeTType(**iter))) { - for (int i = 0; i < 2; i++) { - iter = InsertFormatTransNode(graph, iter, kBefore, i, beforeNodeType, &status); - if (status != RET_OK) { - MS_LOG(ERROR) << "InsertNchw2NhwcNode before " << nodeName << "failed"; - return RET_ERROR; - } - } } else { int idx = 0; if (GetCNodeTType(**iter) == schema::PrimitiveType_ApplyMomentum) idx = 3; diff --git a/mindspore/lite/tools/net_train/CMakeLists.txt b/mindspore/lite/tools/net_train/CMakeLists.txt new file mode 100644 index 0000000000..566b9bf705 --- /dev/null +++ b/mindspore/lite/tools/net_train/CMakeLists.txt @@ -0,0 +1,38 @@ +# add shared link library +set(COMMON_SRC + ${CMAKE_CURRENT_SOURCE_DIR}/../common/flag_parser.cc + ${CMAKE_CURRENT_SOURCE_DIR}/../../src/common/file_utils.cc + ${CMAKE_CURRENT_SOURCE_DIR}/../../src/common/utils.cc + ) + +add_executable(net_train + ${CMAKE_CURRENT_SOURCE_DIR}/main.cc + ${CMAKE_CURRENT_SOURCE_DIR}/net_train.cc + ${COMMON_SRC}) +if (WIN32) + add_dependencies(net_train fbs_src mindspore-lite_static) +else () + add_dependencies(net_train fbs_src) +endif () + +if (PLATFORM_ARM32 OR PLATFORM_ARM64) + target_link_libraries(net_train mindspore-lite) +else() + if (WIN32) + target_link_libraries(net_train mindspore-lite_static pthread cpu_kernel_mid nnacl_mid) + else () + target_link_libraries(net_train mindspore-lite pthread) + endif () +endif() +if (PLATFORM_ARM32 OR PLATFORM_ARM64) + install(TARGETS net_train + RUNTIME DESTINATION ${MAIN_DIR}-${COMPONENT_NAME}/net_train COMPONENT ${COMPONENT_NAME}) +else() + if (WIN32) + install(TARGETS net_train + RUNTIME DESTINATION ${MAIN_DIR}-${WIN_RUN_X86_NAME}/net_train COMPONENT ${WIN_RUN_X86_NAME}) + else () + install(TARGETS net_train + RUNTIME DESTINATION ${MAIN_DIR}-${RUN_X86_COMPONENT_NAME}/net_train COMPONENT ${RUN_X86_COMPONENT_NAME}) + endif () +endif() diff --git a/mindspore/lite/tools/net_train/main.cc b/mindspore/lite/tools/net_train/main.cc new file mode 100644 index 0000000000..cfde9d319f --- /dev/null +++ b/mindspore/lite/tools/net_train/main.cc @@ -0,0 +1,27 @@ +/** + * Copyright 2020 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include "tools/net_train/net_train.h" +#include "include/version.h" + +int main(int argc, const char **argv) { + MS_LOG(INFO) << mindspore::lite::Version(); + int res = mindspore::lite::RunNetTrain(argc, argv); + struct mallinfo info = mallinfo(); + std::cout << "total allocation: " << info.arena << "\n"; + return res; +} diff --git a/mindspore/lite/tools/net_train/net_train.cc b/mindspore/lite/tools/net_train/net_train.cc new file mode 100644 index 0000000000..c4e53eb039 --- /dev/null +++ b/mindspore/lite/tools/net_train/net_train.cc @@ -0,0 +1,780 @@ +/** + * Copyright 2020 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "tools/net_train/net_train.h" +#define __STDC_FORMAT_MACROS +#include +#undef __STDC_FORMAT_MACROS +#include +#include +#include "src/common/common.h" +#include "include/ms_tensor.h" +#include "include/context.h" +#include "src/runtime/runtime_api.h" +#include "include/version.h" + +namespace mindspore { +namespace lite { +static const char *DELIM_COLON = ":"; +static const char *DELIM_COMMA = ","; +static const char *DELIM_SLASH = "/"; + +void SaveFile(std::string path, void *buf, size_t size) { + std::ofstream ofs(path); + assert(true == ofs.good()); + assert(true == ofs.is_open()); + + ofs.seekp(0, std::ios::beg); + ofs.write((const char *)buf, size); + ofs.close(); +} + +int NetTrain::GenerateRandomData(size_t size, void *data) { + MS_ASSERT(data != nullptr); + char *casted_data = static_cast(data); + for (size_t i = 0; i < size; i++) { + casted_data[i] = static_cast(i); + } + return RET_OK; +} + +int NetTrain::GenerateInputData() { + for (auto tensor : ms_inputs_) { + MS_ASSERT(tensor != nullptr); + auto input_data = tensor->MutableData(); + if (input_data == nullptr) { + MS_LOG(ERROR) << "MallocData for inTensor failed"; + return RET_ERROR; + } + auto tensor_byte_size = tensor->Size(); + auto status = GenerateRandomData(tensor_byte_size, input_data); + if (status != 0) { + std::cerr << "GenerateRandomData for inTensor failed: " << status << std::endl; + MS_LOG(ERROR) << "GenerateRandomData for inTensor failed:" << status; + return status; + } + } + return RET_OK; +} + +int NetTrain::LoadInput() { + if (flags_->in_data_file_.empty()) { + auto status = GenerateInputData(); + if (status != 0) { + std::cerr << "Generate input data error " << status << std::endl; + MS_LOG(ERROR) << "Generate input data error " << status; + return status; + } + } else { + auto status = ReadInputFile(); + if (status != 0) { + std::cerr << "ReadInputFile error, " << status << std::endl; + MS_LOG(ERROR) << "ReadInputFile error, " << status; + return status; + } + } + return RET_OK; +} + +int NetTrain::ReadInputFile() { + if (ms_inputs_.empty()) { + return RET_OK; + } + + if (this->flags_->in_data_type_ == kImage) { + MS_LOG(ERROR) << "Not supported image input"; + return RET_ERROR; + } else { + for (size_t i = 0; i < flags_->input_data_list_.size(); i++) { + auto cur_tensor = ms_inputs_.at(i); + MS_ASSERT(cur_tensor != nullptr); + size_t size; + char *bin_buf = ReadFile(flags_->input_data_list_[i].c_str(), &size); + if (bin_buf == nullptr) { + MS_LOG(ERROR) << "ReadFile return nullptr"; + return RET_ERROR; + } + auto tensor_data_size = cur_tensor->Size(); + if (size != tensor_data_size) { + std::cerr << "Input binary file size error, required: " << tensor_data_size << ", in fact: " << size + << std::endl; + MS_LOG(ERROR) << "Input binary file size error, required: " << tensor_data_size << ", in fact: " << size; + delete bin_buf; + return RET_ERROR; + } + auto input_data = cur_tensor->MutableData(); + memcpy(input_data, bin_buf, tensor_data_size); + delete[](bin_buf); + } + } + return RET_OK; +} + +// calibData is FP32 +int NetTrain::ReadCalibData() { + const char *calib_data_path = flags_->data_file_.c_str(); + // read calib data + std::ifstream in_file(calib_data_path); + if (!in_file.good()) { + std::cerr << "file: " << calib_data_path << " is not exist" << std::endl; + MS_LOG(ERROR) << "file: " << calib_data_path << " is not exist"; + return RET_ERROR; + } + + if (!in_file.is_open()) { + std::cerr << "file: " << calib_data_path << " open failed" << std::endl; + MS_LOG(ERROR) << "file: " << calib_data_path << " open failed"; + in_file.close(); + return RET_ERROR; + } + + std::string line; + + MS_LOG(INFO) << "Start reading calibData file"; + std::string tensor_name; + while (!in_file.eof()) { + getline(in_file, line); + std::stringstream string_line1(line); + size_t dim = 0; + string_line1 >> tensor_name >> dim; + std::vector dims; + size_t shape_size = 1; + for (size_t i = 0; i < dim; i++) { + size_t tmp_dim; + string_line1 >> tmp_dim; + dims.push_back(tmp_dim); + shape_size *= tmp_dim; + } + + getline(in_file, line); + std::stringstream string_line2(line); + std::vector tensor_data; + for (size_t i = 0; i < shape_size; i++) { + float tmp_data; + string_line2 >> tmp_data; + tensor_data.push_back(tmp_data); + } + auto *check_tensor = new CheckTensor(dims, tensor_data); + this->data_.insert(std::make_pair(tensor_name, check_tensor)); + } + in_file.close(); + MS_LOG(INFO) << "Finish reading calibData file"; + return RET_OK; +} + +int NetTrain::CompareOutput() { + std::cout << "================ Comparing Output data ================" << std::endl; + float total_bias = 0; + int total_size = 0; + bool has_error = false; + + for (const auto &calib_tensor : data_) { + std::string node_or_tensor_name = calib_tensor.first; + auto tensors = session_->GetOutputsByNodeName(node_or_tensor_name); + mindspore::tensor::MSTensor *tensor = nullptr; + if (tensors.empty() || tensors.size() != 1) { + MS_LOG(INFO) << "Cannot find output node: " << node_or_tensor_name + << " or node has more than one output tensor, switch to GetOutputByTensorName"; + tensor = session_->GetOutputByTensorName(node_or_tensor_name); + if (tensor == nullptr) { + MS_LOG(ERROR) << "Cannot find output tensor " << node_or_tensor_name << ", get model output failed"; + return RET_ERROR; + } + } else { + tensor = tensors.front(); + } + MS_ASSERT(tensor->MutableData() != nullptr); + auto outputs = tensor->MutableData(); + float bias = CompareData(node_or_tensor_name, tensor->shape(), reinterpret_cast(outputs)); + + if (bias >= 0) { + total_bias += bias; + total_size++; + } else { + has_error = true; + break; + } + } + + if (!has_error) { + float mean_bias; + if (total_size != 0) { + mean_bias = total_bias / total_size * 100; + } else { + mean_bias = 0; + } + + std::cout << "Mean bias of all nodes/tensors: " << mean_bias << "%" << std::endl; + std::cout << "=======================================================" << std::endl << std::endl; + + if (mean_bias > this->flags_->accuracy_threshold_) { + MS_LOG(ERROR) << "Mean bias of all nodes/tensors is too big: " << mean_bias << "%"; + std::cerr << "Mean bias of all nodes/tensors is too big: " << mean_bias << "%" << std::endl; + return RET_ERROR; + } else { + return RET_OK; + } + } else { + MS_LOG(ERROR) << "Error in CompareData"; + std::cerr << "Error in CompareData" << std::endl; + std::cout << "=======================================================" << std::endl << std::endl; + return RET_ERROR; + } +} + +int NetTrain::MarkPerformance() { + MS_LOG(INFO) << "Running train loops..."; + std::cout << "Running train loops..." << std::endl; + uint64_t time_min = 1000000; + uint64_t time_max = 0; + uint64_t time_avg = 0; + + for (int i = 0; i < flags_->epochs_; i++) { + session_->BindThread(true); + auto start = GetTimeUs(); + auto status = + flags_->time_profiling_ ? session_->RunGraph(before_call_back_, after_call_back_) : session_->RunGraph(); + if (status != 0) { + MS_LOG(ERROR) << "Inference error " << status; + std::cerr << "Inference error " << status; + return status; + } + + auto end = GetTimeUs(); + auto time = end - start; + time_min = std::min(time_min, time); + time_max = std::max(time_max, time); + time_avg += time; + session_->BindThread(false); + } + + if (flags_->time_profiling_) { + const std::vector per_op_name = {"opName", "avg(ms)", "percent", "calledTimes", "opTotalTime"}; + const std::vector per_op_type = {"opType", "avg(ms)", "percent", "calledTimes", "opTotalTime"}; + PrintResult(per_op_name, op_times_by_name_); + PrintResult(per_op_type, op_times_by_type_); + } + + if (flags_->epochs_ > 0) { + time_avg /= flags_->epochs_; + MS_LOG(INFO) << "Model = " << flags_->model_file_.substr(flags_->model_file_.find_last_of(DELIM_SLASH) + 1).c_str() + << ", NumThreads = " << flags_->num_threads_ << ", MinRunTime = " << time_min / 1000.0f + << ", MaxRuntime = " << time_max / 1000.0f << ", AvgRunTime = " << time_avg / 1000.0f; + printf("Model = %s, NumThreads = %d, MinRunTime = %f ms, MaxRuntime = %f ms, AvgRunTime = %f ms\n", + flags_->model_file_.substr(flags_->model_file_.find_last_of(DELIM_SLASH) + 1).c_str(), flags_->num_threads_, + time_min / 1000.0f, time_max / 1000.0f, time_avg / 1000.0f); + } + return RET_OK; +} + +int NetTrain::MarkAccuracy() { + MS_LOG(INFO) << "MarkAccuracy"; + std::cout << "MarkAccuracy" << std::endl; + for (auto &msInput : ms_inputs_) { + switch (msInput->data_type()) { + case TypeId::kNumberTypeFloat: + PrintInputData(msInput); + break; + case TypeId::kNumberTypeFloat32: + PrintInputData(msInput); + break; + case TypeId::kNumberTypeInt32: + PrintInputData(msInput); + break; + default: + MS_LOG(ERROR) << "Datatype " << msInput->data_type() << " is not supported."; + return RET_ERROR; + } + } + session_->Eval(); + + auto status = session_->RunGraph(); + if (status != RET_OK) { + MS_LOG(ERROR) << "Inference error " << status; + std::cerr << "Inference error " << status << std::endl; + return status; + } + + status = ReadCalibData(); + if (status != RET_OK) { + MS_LOG(ERROR) << "Read calib data error " << status; + std::cerr << "Read calib data error " << status << std::endl; + return status; + } + + status = CompareOutput(); + if (status != RET_OK) { + MS_LOG(ERROR) << "Compare output error " << status; + std::cerr << "Compare output error " << status << std::endl; + return status; + } + return RET_OK; +} + +int NetTrain::RunExportedNet() { + auto start_prepare_time = GetTimeUs(); + // Load graph + std::string model_name = flags_->export_file_.substr(flags_->export_file_.find_last_of(DELIM_SLASH) + 1); + + MS_LOG(INFO) << "start reading exported model file"; + std::cout << "start reading exported model file" << std::endl; + size_t size = 0; + char *graph_buf = ReadFile(flags_->export_file_.c_str(), &size); + if (graph_buf == nullptr) { + MS_LOG(ERROR) << "Read exported model file failed while running " << model_name.c_str(); + std::cerr << "Read exported model file failed while running " << model_name.c_str() << std::endl; + return RET_ERROR; + } + auto model = lite::TrainModel::Import(graph_buf, size); + delete[](graph_buf); + if (model == nullptr) { + MS_LOG(ERROR) << "Import exported model file failed while running " << model_name.c_str(); + std::cerr << "Import exported model file failed while running " << model_name.c_str() << std::endl; + return RET_ERROR; + } + auto context = std::make_shared(); + if (context == nullptr) { + MS_LOG(ERROR) << "New context failed while running " << model_name.c_str(); + std::cerr << "New context failed while running " << model_name.c_str() << std::endl; + return RET_ERROR; + } + + if (flags_->cpu_bind_mode_ == 2) { + context->device_list_[0].device_info_.cpu_device_info_.cpu_bind_mode_ = MID_CPU; + } else if (flags_->cpu_bind_mode_ == 1) { + context->device_list_[0].device_info_.cpu_device_info_.cpu_bind_mode_ = HIGHER_CPU; + } else { + context->device_list_[0].device_info_.cpu_device_info_.cpu_bind_mode_ = NO_BIND; + } + + context->thread_num_ = flags_->num_threads_; + // context->enable_float16_ = flags_->enable_fp16_; + session_ = session::TrainSession::CreateSession(context.get()); + if (session_ == nullptr) { + MS_LOG(ERROR) << "CreateSession failed while running ", model_name.c_str(); + std::cout << "CreateSession failed while running ", model_name.c_str(); + return RET_ERROR; + } + auto ret = session_->CompileTrainGraph(model); + if (ret != RET_OK) { + MS_LOG(ERROR) << "CompileGraph failed while running ", model_name.c_str(); + std::cout << "CompileGraph failed while running ", model_name.c_str(); + return ret; + } + + ms_inputs_ = session_->GetInputs(); + auto end_prepare_time = GetTimeUs(); + MS_LOG(INFO) << "Exported model PrepareTime = " << (end_prepare_time - start_prepare_time) / 1000 << " ms"; + std::cout << "Exported model PrepareTime = " << (end_prepare_time - start_prepare_time) / 1000 << " ms" << std::endl; + + // Load input + MS_LOG(INFO) << "start generate input data"; + auto status = LoadInput(); + if (status != 0) { + MS_LOG(ERROR) << "Generate input data error"; + return status; + } + + status = session_->RunGraph(); + if (status != 0) { + MS_LOG(ERROR) << "Inference error " << status; + std::cerr << "Inference error " << status << std::endl; + return status; + } + + if (!flags_->data_file_.empty()) { + MS_LOG(INFO) << "Check accuracy for exported model"; + std::cout << "Check accuracy for exported model " << std::endl; + status = MarkAccuracy(); + for (auto &data : data_) { + data.second->shape.clear(); + data.second->data.clear(); + delete data.second; + } + data_.clear(); + if (status != 0) { + MS_LOG(ERROR) << "Run MarkAccuracy on exported model error: " << status; + std::cout << "Run MarkAccuracy on exported model error: " << status << std::endl; + return status; + } + } + return RET_OK; +} + +int NetTrain::RunNetTrain() { + auto start_prepare_time = GetTimeUs(); + // Load graph + std::string model_name = flags_->model_file_.substr(flags_->model_file_.find_last_of(DELIM_SLASH) + 1); + + MS_LOG(INFO) << "start reading model file"; + std::cout << "start reading model file" << std::endl; + size_t size = 0; + char *graph_buf = ReadFile(flags_->model_file_.c_str(), &size); + if (graph_buf == nullptr) { + MS_LOG(ERROR) << "Read model file failed while running " << model_name.c_str(); + std::cerr << "Read model file failed while running " << model_name.c_str() << std::endl; + return RET_ERROR; + } + auto model = lite::TrainModel::Import(graph_buf, size); + delete[](graph_buf); + if (model == nullptr) { + MS_LOG(ERROR) << "Import model file failed while running " << model_name.c_str(); + std::cerr << "Import model file failed while running " << model_name.c_str() << std::endl; + return RET_ERROR; + } + auto context = std::make_shared(); + if (context == nullptr) { + MS_LOG(ERROR) << "New context failed while running " << model_name.c_str(); + std::cerr << "New context failed while running " << model_name.c_str() << std::endl; + return RET_ERROR; + } + + if (flags_->cpu_bind_mode_ == 2) { + context->device_list_[0].device_info_.cpu_device_info_.cpu_bind_mode_ = MID_CPU; + } else if (flags_->cpu_bind_mode_ == 1) { + context->device_list_[0].device_info_.cpu_device_info_.cpu_bind_mode_ = HIGHER_CPU; + } else { + context->device_list_[0].device_info_.cpu_device_info_.cpu_bind_mode_ = NO_BIND; + } + context->thread_num_ = flags_->num_threads_; + // context->enable_float16_ = flags_->enable_fp16_; + session_ = session::TrainSession::CreateSession(context.get()); + if (session_ == nullptr) { + MS_LOG(ERROR) << "CreateSession failed while running ", model_name.c_str(); + std::cout << "CreateSession failed while running ", model_name.c_str(); + return RET_ERROR; + } + auto ret = session_->CompileTrainGraph(model); + if (ret != RET_OK) { + MS_LOG(ERROR) << "CompileGraph failed while running ", model_name.c_str(); + std::cout << "CompileGraph failed while running ", model_name.c_str(); + return ret; + } + + session_->Train(); + + ms_inputs_ = session_->GetInputs(); + auto end_prepare_time = GetTimeUs(); + MS_LOG(INFO) << "PrepareTime = " << (end_prepare_time - start_prepare_time) / 1000 << " ms"; + std::cout << "PrepareTime = " << (end_prepare_time - start_prepare_time) / 1000 << " ms" << std::endl; + + // Load input + MS_LOG(INFO) << "start generate input data"; + auto status = LoadInput(); + if (status != 0) { + MS_LOG(ERROR) << "Generate input data error"; + return status; + } + if (flags_->epochs_ > 0) { + status = MarkPerformance(); + if (status != 0) { + MS_LOG(ERROR) << "Run MarkPerformance error: " << status; + std::cout << "Run MarkPerformance error: " << status << std::endl; + return status; + } + } + if (!flags_->data_file_.empty()) { + status = MarkAccuracy(); + for (auto &data : data_) { + data.second->shape.clear(); + data.second->data.clear(); + delete data.second; + } + data_.clear(); + if (status != 0) { + MS_LOG(ERROR) << "Run MarkAccuracy error: " << status; + std::cout << "Run MarkAccuracy error: " << status << std::endl; + return status; + } + } + if (!flags_->export_file_.empty()) { + size_t tsize = 0; + auto buf = session_->ExportToBuf(nullptr, &tsize); + if (buf == nullptr) { + MS_LOG(ERROR) << "Run ExportToBuf error"; + std::cout << "Run ExportToBuf error"; + return RET_ERROR; + } + SaveFile(flags_->export_file_, buf, size); + + status = RunExportedNet(); + if (status != 0) { + MS_LOG(ERROR) << "Run Exported model error: " << status; + std::cout << "Run Exported model error: " << status << std::endl; + return status; + } + } + return RET_OK; +} + +void NetTrainFlags::InitInputDataList() { + char *saveptr1; + char *input_list = new char[this->in_data_file_.length() + 1]; + snprintf(input_list, this->in_data_file_.length() + 1, "%s", this->in_data_file_.c_str()); + char *cur_input; + const char *split_c = ","; + cur_input = strtok_r(input_list, split_c, &saveptr1); + while (cur_input != nullptr) { + input_data_list_.emplace_back(cur_input); + cur_input = strtok_r(nullptr, split_c, &saveptr1); + } + delete[] input_list; +} + +void NetTrainFlags::InitResizeDimsList() { + std::string content; + content = this->resize_dims_in_; + std::vector shape; + auto shape_strs = StringSplit(content, std::string(DELIM_COLON)); + for (const auto &shape_str : shape_strs) { + shape.clear(); + auto dim_strs = StringSplit(shape_str, std::string(DELIM_COMMA)); + std::cout << "Resize Dims: "; + for (const auto &dim_str : dim_strs) { + std::cout << dim_str << " "; + shape.emplace_back(static_cast(std::stoi(dim_str))); + } + std::cout << std::endl; + this->resize_dims_.emplace_back(shape); + } +} + +int NetTrain::InitCallbackParameter() { + // before callback + before_call_back_ = [&](const std::vector &before_inputs, + const std::vector &before_outputs, + const mindspore::CallBackParam &callParam) { + if (before_inputs.empty()) { + MS_LOG(INFO) << "The num of beforeInputs is empty"; + } + if (before_outputs.empty()) { + MS_LOG(INFO) << "The num of beforeOutputs is empty"; + } + if (op_times_by_type_.find(callParam.node_type) == op_times_by_type_.end()) { + op_times_by_type_.insert(std::make_pair(callParam.node_type, std::make_pair(0, 0.0f))); + } + if (op_times_by_name_.find(callParam.node_name) == op_times_by_name_.end()) { + op_times_by_name_.insert(std::make_pair(callParam.node_name, std::make_pair(0, 0.0f))); + } + + op_call_times_total_++; + op_begin_ = GetTimeUs(); + return true; + }; + + // after callback + after_call_back_ = [&](const std::vector &after_inputs, + const std::vector &after_outputs, + const mindspore::CallBackParam &call_param) { + uint64_t opEnd = GetTimeUs(); + + if (after_inputs.empty()) { + MS_LOG(INFO) << "The num of after inputs is empty"; + } + if (after_outputs.empty()) { + MS_LOG(INFO) << "The num of after outputs is empty"; + } + + float cost = static_cast(opEnd - op_begin_) / 1000.0f; + op_cost_total_ += cost; + op_times_by_type_[call_param.node_type].first++; + op_times_by_type_[call_param.node_type].second += cost; + op_times_by_name_[call_param.node_name].first++; + op_times_by_name_[call_param.node_name].second += cost; + return true; + }; + + return RET_OK; +} + +int NetTrain::Init() { + if (this->flags_ == nullptr) { + return 1; + } + MS_LOG(INFO) << "ModelPath = " << this->flags_->model_file_; + MS_LOG(INFO) << "InDataPath = " << this->flags_->in_data_file_; + MS_LOG(INFO) << "InDataType = " << this->flags_->in_data_type_in_; + MS_LOG(INFO) << "Epochs = " << this->flags_->epochs_; + MS_LOG(INFO) << "AccuracyThreshold = " << this->flags_->accuracy_threshold_; + MS_LOG(INFO) << "WarmUpLoopCount = " << this->flags_->warm_up_loop_count_; + MS_LOG(INFO) << "NumThreads = " << this->flags_->num_threads_; + MS_LOG(INFO) << "expectedDataFile = " << this->flags_->data_file_; + MS_LOG(INFO) << "exportDataFile = " << this->flags_->export_file_; + + if (this->flags_->epochs_ < 0) { + MS_LOG(ERROR) << "epochs:" << this->flags_->epochs_ << " must be equal/greater than 0"; + std::cerr << "epochs:" << this->flags_->epochs_ << " must be equal/greater than 0" << std::endl; + return RET_ERROR; + } + + if (this->flags_->num_threads_ < 1) { + MS_LOG(ERROR) << "numThreads:" << this->flags_->num_threads_ << " must be greater than 0"; + std::cerr << "numThreads:" << this->flags_->num_threads_ << " must be greater than 0" << std::endl; + return RET_ERROR; + } + + this->flags_->in_data_type_ = this->flags_->in_data_type_in_ == "img" ? kImage : kBinary; + + if (flags_->in_data_file_.empty() && !flags_->data_file_.empty()) { + MS_LOG(ERROR) << "expectedDataFile not supported in case that inDataFile is not provided"; + std::cerr << "expectedDataFile is not supported in case that inDataFile is not provided" << std::endl; + return RET_ERROR; + } + + if (flags_->in_data_file_.empty() && !flags_->export_file_.empty()) { + MS_LOG(ERROR) << "exportDataFile not supported in case that inDataFile is not provided"; + std::cerr << "exportDataFile is not supported in case that inDataFile is not provided" << std::endl; + return RET_ERROR; + } + + if (flags_->model_file_.empty()) { + MS_LOG(ERROR) << "modelPath is required"; + std::cerr << "modelPath is required" << std::endl; + return 1; + } + flags_->InitInputDataList(); + flags_->InitResizeDimsList(); + if (!flags_->resize_dims_.empty() && flags_->resize_dims_.size() != flags_->input_data_list_.size()) { + MS_LOG(ERROR) << "Size of input resizeDims should be equal to size of input inDataPath"; + std::cerr << "Size of input resizeDims should be equal to size of input inDataPath" << std::endl; + return RET_ERROR; + } + + if (flags_->time_profiling_) { + auto status = InitCallbackParameter(); + if (status != RET_OK) { + MS_LOG(ERROR) << "Init callback Parameter failed."; + std::cerr << "Init callback Parameter failed." << std::endl; + return RET_ERROR; + } + } + + return RET_OK; +} + +int NetTrain::PrintResult(const std::vector &title, + const std::map> &result) { + std::vector columnLenMax(5); + std::vector> rows; + + for (auto &iter : result) { + char stringBuf[5][100] = {}; + std::vector columns; + size_t len; + + len = iter.first.size(); + if (len > columnLenMax.at(0)) { + columnLenMax.at(0) = len + 4; + } + columns.push_back(iter.first); + + len = snprintf(stringBuf[1], sizeof(stringBuf[1]), "%f", iter.second.second / flags_->epochs_); + if (len > columnLenMax.at(1)) { + columnLenMax.at(1) = len + 4; + } + columns.emplace_back(stringBuf[1]); + + len = snprintf(stringBuf[2], sizeof(stringBuf[2]), "%f", iter.second.second / op_cost_total_); + if (len > columnLenMax.at(2)) { + columnLenMax.at(2) = len + 4; + } + columns.emplace_back(stringBuf[2]); + + len = snprintf(stringBuf[3], sizeof(stringBuf[3]), "%d", iter.second.first); + if (len > columnLenMax.at(3)) { + columnLenMax.at(3) = len + 4; + } + columns.emplace_back(stringBuf[3]); + + len = snprintf(stringBuf[4], sizeof(stringBuf[4]), "%f", iter.second.second); + if (len > columnLenMax.at(4)) { + columnLenMax.at(4) = len + 4; + } + columns.emplace_back(stringBuf[4]); + + rows.push_back(columns); + } + + printf("-------------------------------------------------------------------------\n"); + for (int i = 0; i < 5; i++) { + auto printBuf = title[i]; + if (printBuf.size() > columnLenMax.at(i)) { + columnLenMax.at(i) = printBuf.size(); + } + printBuf.resize(columnLenMax.at(i), ' '); + printf("%s\t", printBuf.c_str()); + } + printf("\n"); + for (size_t i = 0; i < rows.size(); i++) { + for (int j = 0; j < 5; j++) { + auto printBuf = rows[i][j]; + printBuf.resize(columnLenMax.at(j), ' '); + printf("%s\t", printBuf.c_str()); + } + printf("\n"); + } + return RET_OK; +} + +NetTrain::~NetTrain() { + for (auto iter : this->data_) { + delete (iter.second); + } + this->data_.clear(); + delete (session_); +} + +int RunNetTrain(int argc, const char **argv) { + NetTrainFlags flags; + Option err = flags.ParseFlags(argc, argv); + + if (err.IsSome()) { + std::cerr << err.Get() << std::endl; + std::cerr << flags.Usage() << std::endl; + return RET_ERROR; + } + + if (flags.help) { + std::cerr << flags.Usage() << std::endl; + return RET_OK; + } + + NetTrain net_trainer(&flags); + auto status = net_trainer.Init(); + if (status != 0) { + MS_LOG(ERROR) << "NetTrain init Error : " << status; + std::cerr << "NetTrain init Error : " << status << std::endl; + return RET_ERROR; + } + + status = net_trainer.RunNetTrain(); + if (status != 0) { + MS_LOG(ERROR) << "Run NetTrain " + << flags.model_file_.substr(flags.model_file_.find_last_of(DELIM_SLASH) + 1).c_str() + << " Failed : " << status; + std::cerr << "Run NetTrain " << flags.model_file_.substr(flags.model_file_.find_last_of(DELIM_SLASH) + 1).c_str() + << " Failed : " << status << std::endl; + return RET_ERROR; + } + + MS_LOG(INFO) << "Run NetTrain " << flags.model_file_.substr(flags.model_file_.find_last_of(DELIM_SLASH) + 1).c_str() + << " Success."; + std::cout << "Run NetTrain " << flags.model_file_.substr(flags.model_file_.find_last_of(DELIM_SLASH) + 1).c_str() + << " Success." << std::endl; + return RET_OK; +} +} // namespace lite +} // namespace mindspore diff --git a/mindspore/lite/tools/net_train/net_train.h b/mindspore/lite/tools/net_train/net_train.h new file mode 100644 index 0000000000..df600a06ac --- /dev/null +++ b/mindspore/lite/tools/net_train/net_train.h @@ -0,0 +1,243 @@ +/** + * Copyright 2020 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef MINDSPORE_LITE_TOOLS_NET_TRAIN_NET_TRAIN_H_ +#define MINDSPORE_LITE_TOOLS_NET_TRAIN_NET_TRAIN_H_ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "include/train_model.h" +#include "tools/common/flag_parser.h" +#include "src/common/file_utils.h" +#include "src/common/utils.h" +#include "include/train_session.h" + +namespace mindspore::lite { +enum MS_API DataType { kImage = 0, kBinary = 1 }; + +constexpr float relativeTolerance = 1e-5; +constexpr float absoluteTolerance = 1e-8; + +struct MS_API CheckTensor { + CheckTensor(const std::vector &shape, const std::vector &data) { + this->shape = shape; + this->data = data; + } + std::vector shape; + std::vector data; +}; + +class MS_API NetTrainFlags : public virtual FlagParser { + public: + NetTrainFlags() { + // common + AddFlag(&NetTrainFlags::model_file_, "modelFile", "Input model file", ""); + AddFlag(&NetTrainFlags::in_data_file_, "inDataFile", "Input data file, if not set, use random input", ""); + // MarkPerformance + AddFlag(&NetTrainFlags::warm_up_loop_count_, "warmUpLoopCount", "Run warm up loop", 0); + AddFlag(&NetTrainFlags::time_profiling_, "timeProfiling", "Run time profiling", false); + AddFlag(&NetTrainFlags::epochs_, "epochs", "Number of training epochs to run", 1); + // MarkAccuracy + AddFlag(&NetTrainFlags::data_file_, "expectedDataFile", "Expected results data file path", ""); + AddFlag(&NetTrainFlags::export_file_, "exportFile", "MS File to export trained model into", ""); + AddFlag(&NetTrainFlags::accuracy_threshold_, "accuracyThreshold", "Threshold of accuracy", 0.5); + } + + ~NetTrainFlags() override = default; + + void InitInputDataList(); + + void InitResizeDimsList(); + + public: + // common + std::string model_file_; + std::string in_data_file_; + std::vector input_data_list_; + DataType in_data_type_; + std::string in_data_type_in_ = "bin"; + int cpu_bind_mode_ = 0; + // MarkPerformance + int num_threads_ = 1; + int warm_up_loop_count_ = 0; + bool time_profiling_; + int epochs_ = 1; + // MarkAccuracy + std::string data_file_; + std::string data_type_ = "FLOAT"; + float accuracy_threshold_; + // Resize + std::string export_file_ = ""; + std::string resize_dims_in_ = ""; + std::vector> resize_dims_; +}; + +class MS_API NetTrain { + public: + explicit NetTrain(NetTrainFlags *flags) : flags_(flags) {} + + virtual ~NetTrain(); + + int Init(); + int RunNetTrain(); + int RunExportedNet(); + + private: + // call GenerateInputData or ReadInputFile to init inputTensors + int LoadInput(); + + // call GenerateRandomData to fill inputTensors + int GenerateInputData(); + + int GenerateRandomData(size_t size, void *data); + + int ReadInputFile(); + + int ReadCalibData(); + + int CompareOutput(); + + int InitCallbackParameter(); + + int PrintResult(const std::vector &title, const std::map> &result); + + template + void PrintInputData(tensor::MSTensor *input) { + MS_ASSERT(input != nullptr); + static int i = 0; + auto inData = reinterpret_cast(input->MutableData()); + std::cout << "InData" << i++ << ": "; + for (size_t j = 0; j < 20; j++) { + std::cout << inData[j] << " "; + } + std::cout << std::endl; + } + + // tensorData need to be converter first + template + float CompareData(const std::string &nodeName, std::vector msShape, T *msTensorData) { + auto iter = this->data_.find(nodeName); + if (iter != this->data_.end()) { + std::vector castedMSShape; + size_t shapeSize = 1; + for (int64_t dim : msShape) { + castedMSShape.push_back(size_t(dim)); + shapeSize *= dim; + } + + CheckTensor *calibTensor = iter->second; + if (calibTensor->shape != castedMSShape) { + std::ostringstream oss; + oss << "Shape of mslite output("; + for (auto dim : castedMSShape) { + oss << dim << ","; + } + oss << ") and shape source model output("; + for (auto dim : calibTensor->shape) { + oss << dim << ","; + } + oss << ") are different"; + std::cerr << oss.str() << std::endl; + MS_LOG(ERROR) << oss.str().c_str(); + return RET_ERROR; + } + size_t errorCount = 0; + float meanError = 0; + std::cout << "Data of node " << nodeName << " : "; + for (size_t j = 0; j < shapeSize; j++) { + if (j < 50) { + std::cout << static_cast(msTensorData[j]) << " "; + } + + if (std::isnan(msTensorData[j]) || std::isinf(msTensorData[j])) { + std::cerr << "Output tensor has nan or inf data, compare fail" << std::endl; + MS_LOG(ERROR) << "Output tensor has nan or inf data, compare fail"; + return RET_ERROR; + } + + auto tolerance = absoluteTolerance + relativeTolerance * fabs(calibTensor->data.at(j)); + auto absoluteError = std::fabs(msTensorData[j] - calibTensor->data.at(j)); + if (absoluteError > tolerance) { + if (fabs(calibTensor->data.at(j)) == 0) { + if (absoluteError > 1e-5) { + meanError += absoluteError; + errorCount++; + } else { + continue; + } + } else { + // just assume that atol = rtol + if (absoluteError > 1e-5) { + meanError += absoluteError / (fabs(calibTensor->data.at(j)) + FLT_MIN); + errorCount++; + } + } + } + } + std::cout << std::endl; + if (meanError > 0.0f) { + meanError /= errorCount; + } + + if (meanError <= 0.0000001) { + std::cout << "Mean bias of node/tensor " << nodeName << " : 0%" << std::endl; + } else { + std::cout << "Mean bias of node/tensor " << nodeName << " : " << meanError * 100 << "%" << std::endl; + } + return meanError; + } else { + MS_LOG(INFO) << "%s is not in Source Model output", nodeName.c_str(); + return RET_ERROR; + } + } + + int MarkPerformance(); + + int MarkAccuracy(); + + private: + NetTrainFlags *flags_; + session::TrainSession *session_; + std::vector ms_inputs_; + std::unordered_map> ms_outputs_; + std::unordered_map data_; + std::unordered_map data_type_map_{{"FLOAT", TypeId::kNumberTypeFloat}, + {"INT32", TypeId::kNumberTypeInt32}}; + + // callback parameters + uint64_t op_begin_ = 0; + int op_call_times_total_ = 0; + float op_cost_total_ = 0.0f; + std::map> op_times_by_type_; + std::map> op_times_by_name_; + + mindspore::KernelCallBack before_call_back_; + mindspore::KernelCallBack after_call_back_; +}; + +int MS_API RunNetTrain(int argc, const char **argv); +} // namespace mindspore::lite +#endif // MINDSPORE_LITE_TOOLS_NET_TRAIN_NET_TRAIN_H_