Update elementwise double grad to save gpu memory (#19509)
* update elementwise double grad to save gpu memory, test=develop * update elementwise_mul/div_grad_grad to save memory, test=develop * remove eval function in eigen statement to save memory, test=develop * add unittest for elementwise_div_grad_grad without dout, test=develop * add unittest for elementwise_add_grad_grad without ddx, test=develop * add float16 cuda kernel for elementwise double grad op, test=developexpand_as_op_1
parent
db26de8389
commit
982e61f5ff
@ -0,0 +1,83 @@
|
||||
// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
#include <algorithm>
|
||||
#include <cstdlib>
|
||||
#include <memory>
|
||||
#include <random>
|
||||
#include <string>
|
||||
#include <vector>
|
||||
#include "gtest/gtest.h"
|
||||
#include "paddle/fluid/framework/lod_tensor.h"
|
||||
#include "paddle/fluid/framework/op_registry.h"
|
||||
#include "paddle/fluid/framework/operator.h"
|
||||
#include "paddle/fluid/framework/scope.h"
|
||||
#include "paddle/fluid/operators/elementwise/test_elementwise_op_grad_grad.h"
|
||||
#include "paddle/fluid/platform/device_context.h"
|
||||
#include "paddle/fluid/platform/enforce.h"
|
||||
#include "paddle/fluid/platform/place.h"
|
||||
|
||||
USE_OP(elementwise_add);
|
||||
|
||||
namespace paddle {
|
||||
namespace operators {
|
||||
|
||||
template <typename T>
|
||||
class TestElementwiseAddGradGradWithoutDout
|
||||
: public TestElementwiseOpGradGrad<T> {
|
||||
public:
|
||||
TestElementwiseAddGradGradWithoutDout(const platform::Place &place,
|
||||
const framework::DDim &dims)
|
||||
: TestElementwiseOpGradGrad<T>("elementwise_add_grad_grad", place, dims,
|
||||
{"Y", "DOut", "DDY"}, {"DDOut"}) {}
|
||||
|
||||
using TestElementwiseOpGradGrad<T>::feed_datas_;
|
||||
using TestElementwiseOpGradGrad<T>::expected_outs_;
|
||||
using TestElementwiseOpGradGrad<T>::dims_;
|
||||
void ComputeExpectedOuts() override {
|
||||
size_t numel = static_cast<size_t>(framework::product(dims_));
|
||||
std::vector<T> dy(numel);
|
||||
std::vector<T> ddout(numel);
|
||||
for (size_t i = 0; i < numel; ++i) {
|
||||
// ddOut = ddX + ddY = ddY if ddX empty
|
||||
ddout[i] = feed_datas_["DDY"][i];
|
||||
}
|
||||
expected_outs_["DDOut"] = ddout;
|
||||
}
|
||||
|
||||
std::unique_ptr<framework::OperatorBase> CreateTestOp() override {
|
||||
auto op = framework::OpRegistry::CreateOp(
|
||||
this->op_type_, {{"Y", {"Y"}}, {"DOut", {"DOut"}}, {"DDY", {"DDY"}}},
|
||||
{{"DDOut", {"DDOut"}}}, {{"use_mkldnn", false}, {"axis", 0}});
|
||||
return op;
|
||||
}
|
||||
};
|
||||
|
||||
TEST(test_elementwise_add_grad_grad_without_ddx, cpu_place) {
|
||||
framework::DDim dims({32, 64});
|
||||
platform::CPUPlace p;
|
||||
TestElementwiseAddGradGradWithoutDout<float> test(p, dims);
|
||||
ASSERT_TRUE(test.Check());
|
||||
}
|
||||
#ifdef PADDLE_WITH_CUDA
|
||||
TEST(test_elementwise_add_grad_grad_without_ddx, gpu_place) {
|
||||
framework::DDim dims({32, 64});
|
||||
platform::CUDAPlace p(0);
|
||||
TestElementwiseAddGradGradWithoutDout<float> test(p, dims);
|
||||
ASSERT_TRUE(test.Check());
|
||||
}
|
||||
#endif
|
||||
|
||||
} // namespace operators
|
||||
} // namespace paddle
|
@ -0,0 +1,97 @@
|
||||
// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
#include <algorithm>
|
||||
#include <cstdlib>
|
||||
#include <memory>
|
||||
#include <random>
|
||||
#include <string>
|
||||
#include <vector>
|
||||
#include "gtest/gtest.h"
|
||||
#include "paddle/fluid/framework/lod_tensor.h"
|
||||
#include "paddle/fluid/framework/op_registry.h"
|
||||
#include "paddle/fluid/framework/operator.h"
|
||||
#include "paddle/fluid/framework/scope.h"
|
||||
#include "paddle/fluid/operators/elementwise/test_elementwise_op_grad_grad.h"
|
||||
#include "paddle/fluid/platform/device_context.h"
|
||||
#include "paddle/fluid/platform/enforce.h"
|
||||
#include "paddle/fluid/platform/place.h"
|
||||
|
||||
USE_OP(elementwise_div);
|
||||
|
||||
namespace paddle {
|
||||
namespace operators {
|
||||
|
||||
template <typename T>
|
||||
class TestElementwiseDivGradGradWithoutDout
|
||||
: public TestElementwiseOpGradGrad<T> {
|
||||
public:
|
||||
TestElementwiseDivGradGradWithoutDout(const platform::Place &place,
|
||||
const framework::DDim &dims)
|
||||
: TestElementwiseOpGradGrad<T>("elementwise_div_grad_grad", place, dims,
|
||||
{"Y", "Out", "DDX", "DDY", "DX"},
|
||||
{"Y@GRAD", "DDOut"}) {}
|
||||
|
||||
using TestElementwiseOpGradGrad<T>::feed_datas_;
|
||||
using TestElementwiseOpGradGrad<T>::expected_outs_;
|
||||
using TestElementwiseOpGradGrad<T>::dims_;
|
||||
void ComputeExpectedOuts() override {
|
||||
size_t numel = static_cast<size_t>(framework::product(dims_));
|
||||
std::vector<T> dy(numel);
|
||||
std::vector<T> ddout(numel);
|
||||
for (size_t i = 0; i < numel; ++i) {
|
||||
// dY(Y@GRAD) = Out * dX * ddY / Y - dX * ddX / Y
|
||||
dy[i] = (feed_datas_["DX"][i] / feed_datas_["Y"][i]) *
|
||||
(feed_datas_["Out"][i] * feed_datas_["DDY"][i] -
|
||||
feed_datas_["DDX"][i]);
|
||||
// ddOut = ddX / Y - Out * ddY / Y = (ddX - Out * ddY) / Y
|
||||
ddout[i] = (feed_datas_["DDX"][i] -
|
||||
feed_datas_["Out"][i] * feed_datas_["DDY"][i]) /
|
||||
(feed_datas_["Y"][i]);
|
||||
}
|
||||
expected_outs_["Y@GRAD"] = dy;
|
||||
expected_outs_["DDOut"] = ddout;
|
||||
}
|
||||
|
||||
std::unique_ptr<framework::OperatorBase> CreateTestOp() override {
|
||||
auto op = framework::OpRegistry::CreateOp(
|
||||
this->op_type_, {{"Y", {"Y"}},
|
||||
{"Out", {"Out"}},
|
||||
{"DDX", {"DDX"}},
|
||||
{"DDY", {"DDY"}},
|
||||
{"DX", {"DX"}}},
|
||||
{{"Y@GRAD", {"Y@GRAD"}}, {"DDOut", {"DDOut"}}},
|
||||
{{"use_mkldnn", false}, {"axis", 0}});
|
||||
return op;
|
||||
}
|
||||
};
|
||||
|
||||
TEST(test_elementwise_div_grad_grad_without_dout, cpu_place) {
|
||||
framework::DDim dims({32, 64});
|
||||
platform::CPUPlace p;
|
||||
TestElementwiseDivGradGradWithoutDout<float> test(p, dims);
|
||||
ASSERT_TRUE(test.Check());
|
||||
}
|
||||
|
||||
#ifdef PADDLE_WITH_CUDA
|
||||
TEST(test_elementwise_div_grad_grad_without_dout, gpu_place) {
|
||||
framework::DDim dims({32, 64});
|
||||
platform::CUDAPlace p(0);
|
||||
TestElementwiseDivGradGradWithoutDout<float> test(p, dims);
|
||||
ASSERT_TRUE(test.Check());
|
||||
}
|
||||
#endif
|
||||
|
||||
} // namespace operators
|
||||
} // namespace paddle
|
@ -0,0 +1,151 @@
|
||||
// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <algorithm>
|
||||
#include <cstdlib>
|
||||
#include <map>
|
||||
#include <memory>
|
||||
#include <random>
|
||||
#include <string>
|
||||
#include <vector>
|
||||
#include "paddle/fluid/framework/lod_tensor.h"
|
||||
#include "paddle/fluid/framework/op_registry.h"
|
||||
#include "paddle/fluid/framework/operator.h"
|
||||
#include "paddle/fluid/framework/scope.h"
|
||||
#include "paddle/fluid/memory/memory.h"
|
||||
#include "paddle/fluid/platform/device_context.h"
|
||||
#include "paddle/fluid/platform/enforce.h"
|
||||
#include "paddle/fluid/platform/place.h"
|
||||
|
||||
namespace paddle {
|
||||
namespace operators {
|
||||
|
||||
// currently, this test class only support same dims
|
||||
template <typename T>
|
||||
class TestElementwiseOpGradGrad {
|
||||
public:
|
||||
TestElementwiseOpGradGrad(const std::string &op_type,
|
||||
const platform::Place &place,
|
||||
const framework::DDim &dims,
|
||||
const std::vector<std::string> &inputs,
|
||||
const std::vector<std::string> &outputs)
|
||||
: op_type_(op_type),
|
||||
place_(place),
|
||||
dims_(dims),
|
||||
inputs_(inputs),
|
||||
outputs_(outputs) {}
|
||||
|
||||
void InitVarInScope(std::string var_name) {
|
||||
in_out_tensors_[var_name] =
|
||||
scope_.Var(var_name)->template GetMutable<framework::LoDTensor>();
|
||||
in_out_tensors_[var_name]->Resize(dims_);
|
||||
in_out_tensors_[var_name]->template mutable_data<T>(place_);
|
||||
}
|
||||
|
||||
void InitFeedData(std::string var_name, size_t size) {
|
||||
// generate random data
|
||||
std::uniform_real_distribution<T> dist(static_cast<T>(10.0),
|
||||
static_cast<T>(20.0));
|
||||
std::mt19937 engine;
|
||||
std::vector<T> data(size);
|
||||
for (size_t i = 0; i < size; ++i) {
|
||||
data[i] = dist(engine);
|
||||
}
|
||||
feed_datas_[var_name] = data;
|
||||
}
|
||||
|
||||
void Setup() {
|
||||
size_t numel = static_cast<size_t>(framework::product(dims_));
|
||||
// init vars in scope and feed inputs
|
||||
for (auto in_name : inputs_) {
|
||||
InitVarInScope(in_name);
|
||||
InitFeedData(in_name, numel);
|
||||
}
|
||||
for (auto out_name : outputs_) {
|
||||
InitVarInScope(out_name);
|
||||
}
|
||||
|
||||
// feeding: copy data to tensor, out tensor don't need init
|
||||
auto bytes = sizeof(T) * numel;
|
||||
for (auto &in_name : inputs_) {
|
||||
auto dst = in_out_tensors_[in_name]->template data<T>();
|
||||
auto src = feed_datas_[in_name].data();
|
||||
auto src_place = platform::CPUPlace();
|
||||
if (platform::is_cpu_place(place_)) {
|
||||
auto dst_place = boost::get<platform::CPUPlace>(place_);
|
||||
memory::Copy(dst_place, dst, src_place, src, bytes);
|
||||
} else if (platform::is_gpu_place(place_)) {
|
||||
#ifdef PADDLE_WITH_CUDA
|
||||
auto dst_place = boost::get<platform::CUDAPlace>(place_);
|
||||
memory::Copy(dst_place, dst, src_place, src, bytes, nullptr);
|
||||
#else
|
||||
PADDLE_THROW("Not compiled with cuda");
|
||||
#endif
|
||||
}
|
||||
}
|
||||
|
||||
// calculate expected outputs
|
||||
ComputeExpectedOuts();
|
||||
}
|
||||
|
||||
bool Check() {
|
||||
Setup();
|
||||
auto op = CreateTestOp();
|
||||
op->Run(scope_, place_);
|
||||
platform::DeviceContextPool::Instance().Get(place_)->Wait();
|
||||
framework::LoDTensor cpu_out;
|
||||
PADDLE_ENFORCE_EQ(scope_.kids().empty(), true, "scope has child scopes");
|
||||
|
||||
// get outputs from scope and compare them with expected_outs
|
||||
bool all_equal = true;
|
||||
for (auto &out_name : outputs_) {
|
||||
auto &out_tensor =
|
||||
scope_.FindVar(out_name)->template Get<framework::LoDTensor>();
|
||||
if (platform::is_gpu_place(place_)) {
|
||||
framework::TensorCopySync(out_tensor, platform::CPUPlace(), &cpu_out);
|
||||
} else {
|
||||
cpu_out = out_tensor;
|
||||
}
|
||||
auto *out_ptr = cpu_out.data<T>();
|
||||
size_t numel = static_cast<size_t>(framework::product(dims_));
|
||||
auto is_equal =
|
||||
std::equal(out_ptr, out_ptr + numel, expected_outs_[out_name].data());
|
||||
if (!is_equal) {
|
||||
all_equal = false;
|
||||
break;
|
||||
}
|
||||
}
|
||||
return all_equal;
|
||||
}
|
||||
|
||||
virtual std::unique_ptr<framework::OperatorBase> CreateTestOp() = 0;
|
||||
virtual void ComputeExpectedOuts() = 0;
|
||||
virtual ~TestElementwiseOpGradGrad() {}
|
||||
|
||||
protected:
|
||||
std::string op_type_;
|
||||
platform::Place place_;
|
||||
framework::DDim dims_;
|
||||
std::vector<std::string> inputs_;
|
||||
std::vector<std::string> outputs_;
|
||||
std::map<std::string, paddle::framework::LoDTensor *> in_out_tensors_;
|
||||
std::map<std::string, std::vector<T>> feed_datas_;
|
||||
std::map<std::string, std::vector<T>> expected_outs_;
|
||||
framework::Scope scope_;
|
||||
};
|
||||
|
||||
} // namespace operators
|
||||
} // namespace paddle
|
Loading…
Reference in new issue