diff --git a/mindspore/lite/src/runtime/kernel/arm/fp32/uniform_real_fp32.cc b/mindspore/lite/src/runtime/kernel/arm/fp32/uniform_real_fp32.cc new file mode 100644 index 0000000000..b208ac9aa1 --- /dev/null +++ b/mindspore/lite/src/runtime/kernel/arm/fp32/uniform_real_fp32.cc @@ -0,0 +1,222 @@ +/** + * Copyright 2021 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#include +#include +#include "src/runtime/kernel/arm/fp32/uniform_real_fp32.h" +#include "src/kernel_registry.h" +#include "include/errorcode.h" + +using mindspore::lite::KernelRegistrar; +using mindspore::lite::RET_ERROR; +using mindspore::lite::RET_OK; +using mindspore::schema::PrimitiveType_UniformReal; + +namespace mindspore::kernel { + +template +class Array { + public: + Array() { + for (int i = 0; i < ElementCount; ++i) { + data_[i] = T(0); + } + } + + const T &operator[](int index) const { return data_[index]; } + + T &operator[](int index) { return data_[index]; } + + private: + T data_[ElementCount]; +}; + +class PhiloxRandom { + public: + using ResultType = Array; + using Key = Array; + + explicit PhiloxRandom(uint64_t seed_lo, uint64_t seed_hi) { + key_[0] = static_cast(seed_lo); + key_[1] = static_cast(seed_lo >> 32); + counter_[2] = static_cast(seed_hi); + counter_[3] = static_cast(seed_hi >> 32); + } + + // Skip the specified number of samples of 128-bits in the current stream. + void Skip(uint64_t count) { + const uint32_t count_lo = static_cast(count); + uint32_t count_hi = static_cast(count >> 32); + + counter_[0] += count_lo; + if (counter_[0] < count_lo) { + ++count_hi; + } + + counter_[1] += count_hi; + if (counter_[1] < count_hi) { + if (++counter_[2] == 0) { + ++counter_[3]; + } + } + } + + // Returns a group of four random numbers using the underlying Philox + // algorithm. + ResultType operator()() { + ResultType counter = counter_; + Key key = key_; + + counter = ComputeSingleRound(counter, key); + RaiseKey(&key); + counter = ComputeSingleRound(counter, key); + RaiseKey(&key); + counter = ComputeSingleRound(counter, key); + RaiseKey(&key); + counter = ComputeSingleRound(counter, key); + RaiseKey(&key); + counter = ComputeSingleRound(counter, key); + RaiseKey(&key); + counter = ComputeSingleRound(counter, key); + RaiseKey(&key); + counter = ComputeSingleRound(counter, key); + RaiseKey(&key); + counter = ComputeSingleRound(counter, key); + RaiseKey(&key); + counter = ComputeSingleRound(counter, key); + RaiseKey(&key); + counter = ComputeSingleRound(counter, key); + + SkipOne(); + + return counter; + } + + private: + // We use the same constants as recommended by the original paper. + static constexpr uint32_t kPhiloxW32A = 0x9E3779B9; + static constexpr uint32_t kPhiloxW32B = 0xBB67AE85; + static constexpr uint32_t kPhiloxM4x32A = 0xD2511F53; + static constexpr uint32_t kPhiloxM4x32B = 0xCD9E8D57; + + // Helper function to skip the next sample of 128-bits in the current stream. + void SkipOne() { + if (++counter_[0] == 0) { + if (++counter_[1] == 0) { + if (++counter_[2] == 0) { + ++counter_[3]; + } + } + } + } + + static void MultiplyHighLow(uint32_t a, uint32_t b, uint32_t *result_low, uint32_t *result_high) { + const uint64_t product = static_cast(a) * b; + *result_low = static_cast(product); + *result_high = static_cast(product >> 32); + } + + // Helper function for a single round of the underlying Philox algorithm. + static ResultType ComputeSingleRound(const ResultType &counter, const Key &key) { + uint32_t lo0; + uint32_t hi0; + MultiplyHighLow(kPhiloxM4x32A, counter[0], &lo0, &hi0); + + uint32_t lo1; + uint32_t hi1; + MultiplyHighLow(kPhiloxM4x32B, counter[2], &lo1, &hi1); + + ResultType result; + result[0] = hi1 ^ counter[1] ^ key[0]; + result[1] = lo1; + result[2] = hi0 ^ counter[3] ^ key[1]; + result[3] = lo0; + return result; + } + + void RaiseKey(Key *key) { + (*key)[0] += kPhiloxW32A; + (*key)[1] += kPhiloxW32B; + } + + private: + ResultType counter_; + Key key_; +}; + +float uint32ToFloat(uint32_t x) { + const uint32_t man = x & 0x7fffffu; // 23 bit mantissa + const uint32_t exp = static_cast(127); + const uint32_t val = (exp << 23) | man; + + // Assumes that endian-ness is same for float and uint32_t. + float result; + memcpy(&result, &val, sizeof(val)); + return result - 1.0f; +} + +void GetPhiloxRandomFloat(float *data, size_t length, int seed, int seed2) { + PhiloxRandom philoxRandom(seed, seed2); + if (length < 4) { + auto randNum = philoxRandom.operator()(); + for (size_t i = 0; i < length; i++) { + data[i] = uint32ToFloat(randNum[i]); + } + } else { + auto randNum = philoxRandom.operator()(); + data[0] = uint32ToFloat(randNum[0]); + data[1] = uint32ToFloat(randNum[1]); + data[2] = uint32ToFloat(randNum[2]); + data[3] = uint32ToFloat(randNum[3]); + for (size_t i = 1; i < length / 4; i++) { + philoxRandom.Skip(0); + randNum = philoxRandom.operator()(); + data[4 * i] = uint32ToFloat(randNum[0]); + data[4 * i + 1] = uint32ToFloat(randNum[1]); + data[4 * i + 2] = uint32ToFloat(randNum[2]); + data[4 * i + 3] = uint32ToFloat(randNum[3]); + } + philoxRandom.Skip(0); + randNum = philoxRandom.operator()(); + for (size_t i = 0; i < length % 4; i++) { + data[4 * (length / 4) + i] = uint32ToFloat(randNum[i]); + } + } +} + +int UniformRealCPUKernel::Init() { return RET_OK; } + +int UniformRealCPUKernel::ReSize() { return RET_OK; } + +int UniformRealCPUKernel::Run() { + auto output0 = reinterpret_cast(out_tensors_.at(0)->MutableData()); + MS_ASSERT(output0); + if (seed_ < 0 || seed2_ < 0) { + MS_LOG(ERROR) << "seed_:" << seed_ << " and seed2_:" << seed2_ << " must be greater than 0!"; + return RET_ERROR; + } + if (seed_ > 0 && seed2_ > 0) { + GetPhiloxRandomFloat(output0, out_tensors_.at(0)->ElementsNum(), seed_, seed2_); + return RET_OK; + } + std::srand(seed_ || seed2_); + for (int i = 0; i < out_tensors_.at(0)->ElementsNum(); ++i) { + output0[i] = static_cast(std::rand()) / static_cast(RAND_MAX); + } + return RET_OK; +} + +REG_KERNEL(kCPU, kNumberTypeInt32, PrimitiveType_UniformReal, LiteKernelCreator) +} // namespace mindspore::kernel diff --git a/mindspore/lite/src/runtime/kernel/arm/fp32/uniform_real_fp32.h b/mindspore/lite/src/runtime/kernel/arm/fp32/uniform_real_fp32.h new file mode 100644 index 0000000000..bc86f4b436 --- /dev/null +++ b/mindspore/lite/src/runtime/kernel/arm/fp32/uniform_real_fp32.h @@ -0,0 +1,43 @@ +/** + * Copyright 2021 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#ifndef MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_FP32_UNIFORM_REAL_H_ +#define MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_FP32_UNIFORM_REAL_H_ + +#include +#include "src/lite_kernel.h" +#include "nnacl/random_parameter.h" + +namespace mindspore::kernel { +class UniformRealCPUKernel : public LiteKernel { + public: + UniformRealCPUKernel(OpParameter *parameter, const std::vector &inputs, + const std::vector &outputs, const lite::InnerContext *ctx) + : LiteKernel(parameter, inputs, outputs, ctx), + seed_(reinterpret_cast(parameter)->seed_), + seed2_(reinterpret_cast(parameter)->seed2_) {} + ~UniformRealCPUKernel() = default; + + int Init() override; + int ReSize() override; + int Run() override; + + private: + int seed_ = 0; + int seed2_ = 0; +}; +} // namespace mindspore::kernel + +#endif // MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_FP32_UNIFORM_REAL_H_ diff --git a/mindspore/lite/test/ut/src/runtime/kernel/arm/fp32/uniform_real_fp32_test.cc b/mindspore/lite/test/ut/src/runtime/kernel/arm/fp32/uniform_real_fp32_test.cc new file mode 100644 index 0000000000..9daebf28b4 --- /dev/null +++ b/mindspore/lite/test/ut/src/runtime/kernel/arm/fp32/uniform_real_fp32_test.cc @@ -0,0 +1,67 @@ +/** + * Copyright 2021 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include "common/common_test.h" +#include "mindspore/lite/nnacl/random_parameter.h" +#include "mindspore/lite/src/kernel_registry.h" + +namespace mindspore { +class TestUniformRealFp32 : public mindspore::CommonTest { + public: + TestUniformRealFp32() {} +}; + +TEST_F(TestUniformRealFp32, UniformReal) { + lite::Tensor out_tensor0(kNumberTypeFloat32, {10}); + float output_data0[10] = {0}; + out_tensor0.set_data(output_data0); + std::vector inputs = {}; + std::vector outputs = {&out_tensor0}; + + RandomParam parameter; + parameter.op_parameter_.type_ = schema::PrimitiveType_UniformReal; + parameter.seed_ = 42; + parameter.seed2_ = 959; + kernel::KernelKey desc = {kernel::KERNEL_ARCH::kCPU, kNumberTypeInt32, schema::PrimitiveType_UniformReal}; + + auto creator = lite::KernelRegistry::GetInstance()->GetCreator(desc); + EXPECT_NE(creator, nullptr); + + auto ctx = std::make_shared(); + ASSERT_EQ(lite::RET_OK, ctx->Init()); + auto kernel = creator(inputs, outputs, reinterpret_cast(¶meter), ctx.get(), desc); + EXPECT_NE(kernel, nullptr); + + auto ret = kernel->Run(); + EXPECT_EQ(0, ret); + EXPECT_NEAR(0.138693, output_data0[0], 0.000001); + EXPECT_NEAR(0.511552, output_data0[1], 0.000001); + EXPECT_NEAR(0.27194, output_data0[2], 0.000001); + EXPECT_NEAR(0.336527, output_data0[3], 0.000001); + EXPECT_NEAR(0.896684, output_data0[4], 0.000001); + EXPECT_NEAR(0.476402, output_data0[5], 0.000001); + EXPECT_NEAR(0.155924, output_data0[6], 0.000001); + EXPECT_NEAR(0.817732, output_data0[7], 0.000001); + EXPECT_NEAR(0.619868, output_data0[8], 0.000001); + EXPECT_NEAR(0.274392, output_data0[9], 0.000001); + + for (int i = 0; i < 10; ++i) { + std::cout << output_data0[i] << " "; + } + out_tensor0.set_data(nullptr); +} +} // namespace mindspore