parent
2d50a43be9
commit
a834a6308e
@ -0,0 +1,104 @@
|
||||
/**
|
||||
* Copyright 2020 Huawei Technologies Co., Ltd
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#include <thrust/extrema.h>
|
||||
#include <thrust/device_vector.h>
|
||||
#include <thrust/execution_policy.h>
|
||||
#include <thrust/reduce.h>
|
||||
#include <thrust/pair.h>
|
||||
#include "minmax_update_impl.cuh"
|
||||
#include "device/gpu/cuda_common.h"
|
||||
|
||||
__global__ void UpdateInputMinMaxPerLayerWithEMA(const float *input_min, const float *input_max, float *output_min,
|
||||
float *output_max, const float min, const float max, const float decay,
|
||||
const float symmetric) {
|
||||
output_min[0] = decay * (min) + (1 - decay) * (input_min[0]);
|
||||
output_min[0] = input_min[0] > 0 ? 0 : input_min[0];
|
||||
output_max[0] = decay * (max) + (1 - decay) * (input_max[0]);
|
||||
output_max[0] = input_max[0] < 0 ? 0 : input_max[0];
|
||||
|
||||
if (symmetric) {
|
||||
output_max[0] = abs(output_min[0]) < output_max[0] ? output_max[0] : -output_min[0];
|
||||
output_min[0] = abs(output_min[0]) < output_max[0] ? -output_max[0] : output_min[0];
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
__global__ void UpdateInputMinMaxPerLayer(float *output_min, float *output_max, const float min, const float max,
|
||||
const float symmetric) {
|
||||
output_min[0] = min > 0 ? 0 : min;
|
||||
output_max[0] = max < 0 ? 0 : max;
|
||||
|
||||
if (symmetric) {
|
||||
output_max[0] = abs(output_min[0]) < output_max[0] ? output_max[0] : -output_min[0];
|
||||
output_min[0] = abs(output_min[0]) < output_max[0] ? -output_max[0] : output_min[0];
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
__global__ void UpdateInputMinMaxPerChannel(float *input, float *input_min, float *input_max, float *output_min,
|
||||
float *output_max, int channels, int per_channel_nums, bool ema,
|
||||
float ema_decay, bool symmetric) {
|
||||
for (int i = blockIdx.x * blockDim.x + threadIdx.x; i < channels; i += blockDim.x * gridDim.x) {
|
||||
thrust::pair<float *, float *> sum =
|
||||
thrust::minmax_element(thrust::device, input + i * per_channel_nums, input + per_channel_nums * (i + 1));
|
||||
if (ema) {
|
||||
output_min[i] = ema_decay * sum.first[0] + (1 - ema_decay) * input_min[i];
|
||||
output_max[i] = ema_decay * sum.second[0] + (1 - ema_decay) * input_max[i];
|
||||
} else {
|
||||
output_min[i] = sum.first[0];
|
||||
output_max[i] = sum.second[0];
|
||||
}
|
||||
output_min[i] = input_min[i] > 0 ? 0 : input_min[i];
|
||||
output_max[i] = input_max[i] < 0 ? 0 : input_max[i];
|
||||
|
||||
if (symmetric) {
|
||||
output_max[i] = abs(output_min[i]) < output_max[i] ? output_max[i] : -output_min[i];
|
||||
output_min[i] = abs(output_min[i]) < output_max[i] ? -output_max[i] : output_min[i];
|
||||
}
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
void CalMinMaxPerChannel(float *input, float *input_min, float *input_max, float *output_min, float *output_max,
|
||||
const int total_num, const int channel_num, const float ema_decay, const bool ema,
|
||||
const bool symmetric, cudaStream_t cuda_stream) {
|
||||
int per_channel_num = total_num / channel_num;
|
||||
UpdateInputMinMaxPerChannel<<<GET_BLOCKS(channel_num), GET_THREADS, 0, cuda_stream>>>(
|
||||
input, input_min, input_max, output_min, output_max, channel_num, per_channel_num, ema, ema_decay, symmetric);
|
||||
return;
|
||||
}
|
||||
|
||||
void CalMinMaxPerLayer(float *input, float *input_min, float *input_max, float *output_min, float *output_max,
|
||||
const int total_num, const float ema_decay, const bool ema, const bool symmetric,
|
||||
cudaStream_t cuda_stream) {
|
||||
float minel = 0.f;
|
||||
float maxel = 0.f;
|
||||
auto policy = thrust::cuda::par.on(cuda_stream);
|
||||
thrust::pair<thrust::device_ptr<float>, thrust::device_ptr<float>> tuple;
|
||||
tuple =
|
||||
thrust::minmax_element(policy, thrust::device_pointer_cast(input), thrust::device_pointer_cast(input) + total_num);
|
||||
minel = tuple.first[0];
|
||||
maxel = tuple.second[0];
|
||||
|
||||
if (ema) {
|
||||
UpdateInputMinMaxPerLayerWithEMA<<<1, 1, 0, cuda_stream>>>(input_min, input_max, output_min, output_max, minel,
|
||||
maxel, ema_decay, symmetric);
|
||||
} else {
|
||||
UpdateInputMinMaxPerLayer<<<1, 1, 0, cuda_stream>>>(output_min, output_max, minel, maxel, symmetric);
|
||||
}
|
||||
return;
|
||||
}
|
@ -0,0 +1,30 @@
|
||||
/**
|
||||
* Copyright 2020 Huawei Technologies Co., Ltd
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#ifndef MINDSPORE_CCSRC_KERNEL_GPU_CUDA_MIN_MAX_UPDATE_IMPL_H_
|
||||
#define MINDSPORE_CCSRC_KERNEL_GPU_CUDA_MIN_MAX_UPDATE_IMPL_H_
|
||||
|
||||
#include "device/gpu/cuda_common.h"
|
||||
|
||||
void CalMinMaxPerChannel(float *input, float *input_min, float *input_max, float *output_min, float *output_max,
|
||||
const int total_num, const int channel_num, const float ema_decay, const bool ema,
|
||||
const bool symmetric, cudaStream_t cuda_stream);
|
||||
|
||||
void CalMinMaxPerLayer(float *input, float *input_min, float *input_max, float *output_min, float *output_max,
|
||||
const int size, const float ema_decay, const bool ema, const bool symmetric,
|
||||
cudaStream_t cuda_stream);
|
||||
|
||||
#endif // MINDSPORE_CCSRC_KERNEL_GPU_CUDA_MIN_MAX_UPDATE_IMPL_H_
|
@ -0,0 +1,119 @@
|
||||
/**
|
||||
* Copyright 2020 Huawei Technologies Co., Ltd
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#include "kernel/gpu/quant/minmax_update_perchannel_gpu_kernel.h"
|
||||
#include "kernel/gpu/cuda_impl/minmax_update_impl.cuh"
|
||||
#include <thrust/extrema.h>
|
||||
#include <thrust/pair.h>
|
||||
#include <thrust/device_vector.h>
|
||||
#include <cuda_runtime_api.h>
|
||||
|
||||
namespace mindspore {
|
||||
namespace kernel {
|
||||
MinMaxUpdatePerChannelGpuKernel::MinMaxUpdatePerChannelGpuKernel()
|
||||
: input_size_(0),
|
||||
num_bits_(0),
|
||||
quant_min_(0),
|
||||
quant_max_(0),
|
||||
quant_num_(1),
|
||||
ema_(false),
|
||||
ema_decay_(0),
|
||||
num_channels_(0),
|
||||
narrow_range_(false),
|
||||
symmetric_(false) {}
|
||||
|
||||
const std::vector<size_t> &MinMaxUpdatePerChannelGpuKernel::GetInputSizeList() const { return input_size_list_; }
|
||||
|
||||
const std::vector<size_t> &MinMaxUpdatePerChannelGpuKernel::GetOutputSizeList() const { return output_size_list_; }
|
||||
|
||||
const std::vector<size_t> &MinMaxUpdatePerChannelGpuKernel::GetWorkspaceSizeList() const {
|
||||
return workspace_size_list_;
|
||||
}
|
||||
|
||||
bool MinMaxUpdatePerChannelGpuKernel::Init(const CNodePtr &kernel_node) {
|
||||
size_t input_num = AnfAlgo::GetInputTensorNum(kernel_node);
|
||||
if (input_num != 3) {
|
||||
MS_LOG(EXCEPTION) << "Input number is " << input_num << ", but FakeQuant GpuKernel OP needs 3 output.";
|
||||
}
|
||||
|
||||
size_t output_num = AnfAlgo::GetOutputTensorNum(kernel_node);
|
||||
if (output_num != 2) {
|
||||
MS_LOG(EXCEPTION) << "Output number is " << output_num << ", but FakeQuant GpuKernel OP needs 1 output.";
|
||||
}
|
||||
|
||||
num_bits_ = GetValue<int>(AnfAlgo::GetCNodePrimitive(kernel_node)->GetAttr("num_bits"));
|
||||
ema_ = GetValue<bool>(AnfAlgo::GetCNodePrimitive(kernel_node)->GetAttr("ema"));
|
||||
ema_decay_ = GetValue<float>(AnfAlgo::GetCNodePrimitive(kernel_node)->GetAttr("ema_decay"));
|
||||
symmetric_ = GetValue<bool>(AnfAlgo::GetCNodePrimitive(kernel_node)->GetAttr("symmetric"));
|
||||
narrow_range_ = GetValue<bool>(AnfAlgo::GetCNodePrimitive(kernel_node)->GetAttr("narrow_range"));
|
||||
|
||||
if (num_bits_ <= 2 || num_bits_ >= 16) {
|
||||
MS_LOG(EXCEPTION) << "Attr \'num_bits\' " << num_bits_ << " is out of range, expected between 2 and 16.";
|
||||
}
|
||||
|
||||
// quant min and max
|
||||
quant_min_ = 0;
|
||||
quant_max_ = (1 << num_bits_) - 1;
|
||||
if (narrow_range_) {
|
||||
quant_min_++;
|
||||
}
|
||||
|
||||
// init size
|
||||
auto input_shape = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 0);
|
||||
num_channels_ = SizeToInt(input_shape[0]);
|
||||
for (size_t i = 0; i < input_shape.size(); ++i) {
|
||||
quant_num_ *= SizeToInt(input_shape[i]);
|
||||
}
|
||||
input_size_ = sizeof(float);
|
||||
for (size_t i = 0; i < input_shape.size(); i++) {
|
||||
input_size_ *= input_shape[i];
|
||||
}
|
||||
InitSizeLists();
|
||||
return true;
|
||||
}
|
||||
|
||||
void MinMaxUpdatePerChannelGpuKernel::InitSizeLists() {
|
||||
input_size_list_.push_back(input_size_); // input
|
||||
input_size_list_.push_back(sizeof(float) * num_channels_); // min
|
||||
input_size_list_.push_back(sizeof(float) * num_channels_); // max
|
||||
output_size_list_.push_back(sizeof(float) * num_channels_); // output min
|
||||
output_size_list_.push_back(sizeof(float) * num_channels_); // output max
|
||||
}
|
||||
|
||||
bool MinMaxUpdatePerChannelGpuKernel::Launch(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &,
|
||||
const std::vector<AddressPtr> &outputs, void *stream_ptr) {
|
||||
float *output_min = GetDeviceAddress<float>(outputs, 0);
|
||||
float *output_max = GetDeviceAddress<float>(outputs, 1);
|
||||
float *input = GetDeviceAddress<float>(inputs, 0);
|
||||
float *input_min = GetDeviceAddress<float>(inputs, 1);
|
||||
float *input_max = GetDeviceAddress<float>(inputs, 2);
|
||||
|
||||
if (input == nullptr) {
|
||||
MS_LOG(EXCEPTION) << "MinMaxUpdatePerChannelGpuKernel input x is null.";
|
||||
}
|
||||
if (input_min == nullptr || input_max == nullptr) {
|
||||
MS_LOG(EXCEPTION) << "MinMaxUpdatePerChannelGpuKernel input min or input max is null.";
|
||||
}
|
||||
|
||||
// calculate the input min and max according by the parameter ema and ema_decay.
|
||||
CalMinMaxPerChannel(input, input_min, input_max, output_min, output_max, input_size_ / sizeof(float), num_channels_,
|
||||
ema_decay_, ema_, symmetric_, reinterpret_cast<cudaStream_t>(stream_ptr));
|
||||
return true;
|
||||
}
|
||||
|
||||
MS_REG_GPU_KERNEL(MinMaxUpdatePerChannel, MinMaxUpdatePerChannelGpuKernel)
|
||||
} // namespace kernel
|
||||
} // namespace mindspore
|
@ -0,0 +1,60 @@
|
||||
/**
|
||||
* Copyright 2020 Huawei Technologies Co., Ltd
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#ifndef MINDSPORE_CCSRC_KERNEL_GPU_MINMAX_UPDATE_PERCHANNEL_GPUKERNEL_H_
|
||||
#define MINDSPORE_CCSRC_KERNEL_GPU_MINMAX_UPDATE_PERCHANNEL_GPUKERNEL_H_
|
||||
|
||||
#include <vector>
|
||||
#include "kernel/gpu/gpu_kernel.h"
|
||||
#include "kernel/gpu/gpu_kernel_factory.h"
|
||||
|
||||
namespace mindspore {
|
||||
namespace kernel {
|
||||
class MinMaxUpdatePerChannelGpuKernel : public GpuKernel {
|
||||
public:
|
||||
MinMaxUpdatePerChannelGpuKernel();
|
||||
~MinMaxUpdatePerChannelGpuKernel() = default;
|
||||
|
||||
const std::vector<size_t> &GetInputSizeList() const override;
|
||||
const std::vector<size_t> &GetOutputSizeList() const override;
|
||||
const std::vector<size_t> &GetWorkspaceSizeList() const override;
|
||||
bool Launch(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &workspace,
|
||||
const std::vector<AddressPtr> &outputs, void *stream_ptr) override;
|
||||
bool Init(const CNodePtr &kernel) override;
|
||||
|
||||
protected:
|
||||
void InitSizeLists() override;
|
||||
|
||||
private:
|
||||
size_t input_size_;
|
||||
std::vector<size_t> input_size_list_;
|
||||
std::vector<size_t> output_size_list_;
|
||||
std::vector<size_t> workspace_size_list_;
|
||||
|
||||
int num_bits_;
|
||||
float quant_min_;
|
||||
float quant_max_;
|
||||
int quant_num_;
|
||||
bool ema_;
|
||||
float ema_decay_;
|
||||
int num_channels_;
|
||||
bool narrow_range_;
|
||||
bool symmetric_;
|
||||
};
|
||||
} // namespace kernel
|
||||
} // namespace mindspore
|
||||
|
||||
#endif // MINDSPORE_CCSRC_KERNEL_GPU_MINMAX_UPDATE_PERCHANNEL_GPUKERNEL_H_
|
@ -0,0 +1,115 @@
|
||||
/**
|
||||
* Copyright 2020 Huawei Technologies Co., Ltd
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#include "kernel/gpu/quant/minmax_update_perlayer_gpu_kernel.h"
|
||||
#include "kernel/gpu/cuda_impl/minmax_update_impl.cuh"
|
||||
#include <thrust/extrema.h>
|
||||
#include <thrust/pair.h>
|
||||
#include <thrust/device_vector.h>
|
||||
#include <cuda_runtime_api.h>
|
||||
|
||||
namespace mindspore {
|
||||
namespace kernel {
|
||||
MinMaxUpdatePerLayerGpuKernel::MinMaxUpdatePerLayerGpuKernel()
|
||||
: input_size_(0),
|
||||
num_bits_(0),
|
||||
quant_min_(0),
|
||||
quant_max_(0),
|
||||
quant_num_(1),
|
||||
ema_(false),
|
||||
ema_decay_(0),
|
||||
narrow_range_(false),
|
||||
symmetric_(false) {}
|
||||
|
||||
const std::vector<size_t> &MinMaxUpdatePerLayerGpuKernel::GetInputSizeList() const { return input_size_list_; }
|
||||
|
||||
const std::vector<size_t> &MinMaxUpdatePerLayerGpuKernel::GetOutputSizeList() const { return output_size_list_; }
|
||||
|
||||
const std::vector<size_t> &MinMaxUpdatePerLayerGpuKernel::GetWorkspaceSizeList() const { return workspace_size_list_; }
|
||||
|
||||
bool MinMaxUpdatePerLayerGpuKernel::Init(const CNodePtr &kernel_node) {
|
||||
size_t input_num = AnfAlgo::GetInputTensorNum(kernel_node);
|
||||
if (input_num != 3) {
|
||||
MS_LOG(EXCEPTION) << "Input number is " << input_num << ", but FakeQuant GpuKernel OP needs 3 output.";
|
||||
}
|
||||
|
||||
size_t output_num = AnfAlgo::GetOutputTensorNum(kernel_node);
|
||||
if (output_num != 2) {
|
||||
MS_LOG(EXCEPTION) << "Output number is " << output_num << ", but FakeQuant GpuKernel OP needs 1 output.";
|
||||
}
|
||||
|
||||
num_bits_ = GetValue<int>(AnfAlgo::GetCNodePrimitive(kernel_node)->GetAttr("num_bits"));
|
||||
ema_ = GetValue<bool>(AnfAlgo::GetCNodePrimitive(kernel_node)->GetAttr("ema"));
|
||||
ema_decay_ = GetValue<float>(AnfAlgo::GetCNodePrimitive(kernel_node)->GetAttr("ema_decay"));
|
||||
symmetric_ = GetValue<bool>(AnfAlgo::GetCNodePrimitive(kernel_node)->GetAttr("symmetric"));
|
||||
narrow_range_ = GetValue<bool>(AnfAlgo::GetCNodePrimitive(kernel_node)->GetAttr("narrow_range"));
|
||||
|
||||
if (num_bits_ <= 2 || num_bits_ >= 16) {
|
||||
MS_LOG(EXCEPTION) << "Attr \'num_bits\' " << num_bits_ << " is out of range, expected between 2 and 16.";
|
||||
}
|
||||
|
||||
// quant min and max
|
||||
quant_min_ = 0;
|
||||
quant_max_ = (1 << num_bits_) - 1;
|
||||
if (narrow_range_) {
|
||||
quant_min_++;
|
||||
}
|
||||
|
||||
// init size
|
||||
auto input_shape = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 0);
|
||||
for (size_t i = 0; i < input_shape.size(); ++i) {
|
||||
quant_num_ *= SizeToInt(input_shape[i]);
|
||||
}
|
||||
input_size_ = sizeof(float);
|
||||
for (size_t i = 0; i < input_shape.size(); i++) {
|
||||
input_size_ *= input_shape[i];
|
||||
}
|
||||
InitSizeLists();
|
||||
return true;
|
||||
}
|
||||
|
||||
void MinMaxUpdatePerLayerGpuKernel::InitSizeLists() {
|
||||
input_size_list_.push_back(input_size_); // input
|
||||
input_size_list_.push_back(sizeof(float)); // input min
|
||||
input_size_list_.push_back(sizeof(float)); // input max
|
||||
output_size_list_.push_back(sizeof(float)); // output min
|
||||
output_size_list_.push_back(sizeof(float)); // output max
|
||||
}
|
||||
|
||||
bool MinMaxUpdatePerLayerGpuKernel::Launch(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &,
|
||||
const std::vector<AddressPtr> &outputs, void *stream_ptr) {
|
||||
float *output_min = GetDeviceAddress<float>(outputs, 0);
|
||||
float *output_max = GetDeviceAddress<float>(outputs, 1);
|
||||
float *input = GetDeviceAddress<float>(inputs, 0);
|
||||
float *input_min = GetDeviceAddress<float>(inputs, 1);
|
||||
float *input_max = GetDeviceAddress<float>(inputs, 2);
|
||||
|
||||
if (input == nullptr) {
|
||||
MS_LOG(EXCEPTION) << "MinMaxUpdatePerLayerGpuKernel input x is null.";
|
||||
}
|
||||
if (input_min == nullptr || input_max == nullptr) {
|
||||
MS_LOG(EXCEPTION) << "MinMaxUpdatePerLayerGpuKernel input min or input max is null.";
|
||||
}
|
||||
|
||||
CalMinMaxPerLayer(input, input_min, input_max, output_min, output_max, quant_num_, ema_decay_, ema_, symmetric_,
|
||||
reinterpret_cast<cudaStream_t>(stream_ptr));
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
MS_REG_GPU_KERNEL(MinMaxUpdatePerLayer, MinMaxUpdatePerLayerGpuKernel)
|
||||
} // namespace kernel
|
||||
} // namespace mindspore
|
@ -0,0 +1,59 @@
|
||||
/**
|
||||
* Copyright 2020 Huawei Technologies Co., Ltd
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#ifndef MINDSPORE_CCSRC_KERNEL_GPU_MINMAX_UPDATE_PERLAYER_GPUKERNEL_H_
|
||||
#define MINDSPORE_CCSRC_KERNEL_GPU_MINMAX_UPDATE_PERLAYER_GPUKERNEL_H_
|
||||
|
||||
#include <vector>
|
||||
#include "kernel/gpu/gpu_kernel.h"
|
||||
#include "kernel/gpu/gpu_kernel_factory.h"
|
||||
|
||||
namespace mindspore {
|
||||
namespace kernel {
|
||||
class MinMaxUpdatePerLayerGpuKernel : public GpuKernel {
|
||||
public:
|
||||
MinMaxUpdatePerLayerGpuKernel();
|
||||
~MinMaxUpdatePerLayerGpuKernel() = default;
|
||||
|
||||
const std::vector<size_t> &GetInputSizeList() const override;
|
||||
const std::vector<size_t> &GetOutputSizeList() const override;
|
||||
const std::vector<size_t> &GetWorkspaceSizeList() const override;
|
||||
bool Launch(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &workspace,
|
||||
const std::vector<AddressPtr> &outputs, void *stream_ptr) override;
|
||||
bool Init(const CNodePtr &kernel) override;
|
||||
|
||||
protected:
|
||||
void InitSizeLists() override;
|
||||
|
||||
private:
|
||||
size_t input_size_;
|
||||
std::vector<size_t> input_size_list_;
|
||||
std::vector<size_t> output_size_list_;
|
||||
std::vector<size_t> workspace_size_list_;
|
||||
|
||||
int num_bits_;
|
||||
float quant_min_;
|
||||
float quant_max_;
|
||||
int quant_num_;
|
||||
bool ema_;
|
||||
float ema_decay_;
|
||||
bool narrow_range_;
|
||||
bool symmetric_;
|
||||
};
|
||||
} // namespace kernel
|
||||
} // namespace mindspore
|
||||
|
||||
#endif // MINDSPORE_CCSRC_KERNEL_GPU_MINMAX_UPDATE_PERLAYER_GPUKERNEL_H_
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in new issue