review gpu quant code

pull/2097/head
chenzomi 5 years ago
parent 703c1b26dd
commit 23e04f3963

@ -39,12 +39,10 @@ class BatchNormFold2GpuKernel : public GpuKernel {
~BatchNormFold2GpuKernel() override { DestroyResource(); }
const std::vector<size_t> &GetInputSizeList() const override { return input_size_list_; }
const std::vector<size_t> &GetOutputSizeList() const override { return output_size_list_; }
const std::vector<size_t> &GetWorkspaceSizeList() const override { return workspace_size_list_; }
bool Launch(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &workspace,
bool Launch(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &,
const std::vector<AddressPtr> &outputs, void *stream_ptr) override {
if (is_null_input_) {
return true;
@ -111,10 +109,7 @@ class BatchNormFold2GpuKernel : public GpuKernel {
input_size_list_.push_back(weight_size); // running_std
input_size_list_.push_back(weight_size); // running_mean
input_size_list_.push_back(sizeof(int32_t)); // global_step
output_size_list_.push_back(input_size);
workspace_size_list_.push_back(sizeof(int32_t));
}
private:

@ -39,9 +39,7 @@ class BatchNormFold2GradGpuKernel : public GpuKernel {
~BatchNormFold2GradGpuKernel() override { DestroyResource(); }
const std::vector<size_t> &GetInputSizeList() const override { return input_size_list_; }
const std::vector<size_t> &GetOutputSizeList() const override { return output_size_list_; }
const std::vector<size_t> &GetWorkspaceSizeList() const override { return workspace_size_list_; }
bool Launch(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &workspace,

@ -47,9 +47,7 @@ class BatchNormFoldGpuKernel : public GpuKernel {
~BatchNormFoldGpuKernel() override { DestroyResource(); }
const std::vector<size_t> &GetInputSizeList() const override { return input_size_list_; }
const std::vector<size_t> &GetOutputSizeList() const override { return output_size_list_; }
const std::vector<size_t> &GetWorkspaceSizeList() const override { return workspace_size_list_; }
bool Launch(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &workspace,

@ -46,9 +46,8 @@ class BatchNormFoldGradGpuKernel : public GpuKernel {
const std::vector<size_t> &GetOutputSizeList() const override { return output_size_list_; }
const std::vector<size_t> &GetWorkspaceSizeList() const override { return workspace_size_list_; }
bool Launch(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &workspace,
bool Launch(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &,
const std::vector<AddressPtr> &outputs, void *stream_ptr) override {
(void)workspace;
// 'd_batch_mean', 'd_batch_std', 'x', 'batch_mean', 'batch_std', 'current_step'
T *d_batch_mean = GetDeviceAddress<T>(inputs, 0);
T *d_batch_std = GetDeviceAddress<T>(inputs, 1);
@ -139,11 +138,8 @@ class BatchNormFoldGradGpuKernel : public GpuKernel {
input_size_list_.push_back(channel_size_);
input_size_list_.push_back(channel_size_);
input_size_list_.push_back(sizeof(int));
// 'dx'
output_size_list_.push_back(input_size_);
workspace_size_list_.push_back(workspace_size_);
}
private:

@ -33,7 +33,8 @@ class CorrectionMulGpuKernel : public GpuKernel {
const std::vector<size_t> &GetInputSizeList() const override { return input_size_list_; }
const std::vector<size_t> &GetOutputSizeList() const override { return output_size_list_; }
const std::vector<size_t> &GetWorkspaceSizeList() const override { return workspace_size_list_; }
bool Launch(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &workspace,
bool Launch(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &,
const std::vector<AddressPtr> &outputs, void *stream_ptr) override {
auto *weight = GetDeviceAddress<T>(inputs, 0);
auto *gamma = GetDeviceAddress<T>(inputs, 1);
@ -74,10 +75,9 @@ class CorrectionMulGpuKernel : public GpuKernel {
input_size_list_.push_back(input_size); // weight
input_size_list_.push_back(weight_size); // gamma
input_size_list_.push_back(weight_size); // running_std
size_t workspace_size = 0;
output_size_list_.push_back(input_size);
workspace_size_list_.push_back(workspace_size);
}
void InitResource() override {}
private:

@ -101,10 +101,9 @@ void FakeQuantGradGpuKernel::InitSizeLists() {
input_size_list_.push_back(min_size_); // min
input_size_list_.push_back(max_size_); // max
output_size_list_.push_back(output_size_);
workspace_size_list_.push_back(workspace_size_);
}
bool FakeQuantGradGpuKernel::Launch(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &workspace,
bool FakeQuantGradGpuKernel::Launch(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &,
const std::vector<AddressPtr> &outputs, void *stream_ptr) {
float *output = GetDeviceAddress<float>(outputs, 0);
float *gradient = GetDeviceAddress<float>(inputs, 0);

Loading…
Cancel
Save