Make leaky relu inplacable (#19676)
* make leaky relu inplacable, test=develop * force add unittests to pass coverage, test=developexpand_as_op_1
parent
c308c88d71
commit
0daa5c9772
@ -0,0 +1,26 @@
|
|||||||
|
// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
|
||||||
|
//
|
||||||
|
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
// you may not use this file except in compliance with the License.
|
||||||
|
// You may obtain a copy of the License at
|
||||||
|
//
|
||||||
|
// http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
//
|
||||||
|
// Unless required by applicable law or agreed to in writing, software
|
||||||
|
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
// See the License for the specific language governing permissions and
|
||||||
|
// limitations under the License.
|
||||||
|
|
||||||
|
#include "paddle/fluid/operators/test_leaky_relu_grad_grad_functor.h"
|
||||||
|
|
||||||
|
namespace paddle {
|
||||||
|
namespace operators {
|
||||||
|
|
||||||
|
TEST(leaky_relu_grad_grad, test_cpu) {
|
||||||
|
ASSERT_TRUE(
|
||||||
|
TestLeakyReluGradGradMain<float>({32, 64}, platform::CPUPlace(), 0.02));
|
||||||
|
}
|
||||||
|
|
||||||
|
} // namespace operators
|
||||||
|
} // namespace paddle
|
@ -0,0 +1,26 @@
|
|||||||
|
// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
|
||||||
|
//
|
||||||
|
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
// you may not use this file except in compliance with the License.
|
||||||
|
// You may obtain a copy of the License at
|
||||||
|
//
|
||||||
|
// http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
//
|
||||||
|
// Unless required by applicable law or agreed to in writing, software
|
||||||
|
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
// See the License for the specific language governing permissions and
|
||||||
|
// limitations under the License.
|
||||||
|
|
||||||
|
#include "paddle/fluid/operators/test_leaky_relu_grad_grad_functor.h"
|
||||||
|
|
||||||
|
namespace paddle {
|
||||||
|
namespace operators {
|
||||||
|
|
||||||
|
TEST(leaky_relu_grad_grad, test_gpu) {
|
||||||
|
ASSERT_TRUE(
|
||||||
|
TestLeakyReluGradGradMain<float>({32, 64}, platform::CUDAPlace(0), 0.15));
|
||||||
|
}
|
||||||
|
|
||||||
|
} // namespace operators
|
||||||
|
} // namespace paddle
|
@ -0,0 +1,124 @@
|
|||||||
|
// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
|
||||||
|
//
|
||||||
|
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
// you may not use this file except in compliance with the License.
|
||||||
|
// You may obtain a copy of the License at
|
||||||
|
//
|
||||||
|
// http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
//
|
||||||
|
// Unless required by applicable law or agreed to in writing, software
|
||||||
|
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
// See the License for the specific language governing permissions and
|
||||||
|
// limitations under the License.
|
||||||
|
|
||||||
|
#pragma once
|
||||||
|
|
||||||
|
#include <algorithm>
|
||||||
|
#include <random>
|
||||||
|
#include "gtest/gtest.h"
|
||||||
|
#include "paddle/fluid/operators/activation_op.h"
|
||||||
|
#include "paddle/fluid/platform/for_range.h"
|
||||||
|
|
||||||
|
namespace paddle {
|
||||||
|
namespace operators {
|
||||||
|
|
||||||
|
template <typename T>
|
||||||
|
static void InitRandom(framework::Tensor *tensor,
|
||||||
|
const platform::Place &place) {
|
||||||
|
framework::Tensor cpu_tensor;
|
||||||
|
auto *cpu_ptr =
|
||||||
|
cpu_tensor.mutable_data<T>(tensor->dims(), platform::CPUPlace());
|
||||||
|
int64_t numel = cpu_tensor.numel();
|
||||||
|
std::mt19937 engine;
|
||||||
|
std::uniform_real_distribution<T> dist(static_cast<T>(-2.0),
|
||||||
|
static_cast<T>(2.0));
|
||||||
|
for (int64_t i = 0; i < numel; ++i) {
|
||||||
|
cpu_ptr[i] = dist(engine);
|
||||||
|
}
|
||||||
|
framework::TensorCopySync(cpu_tensor, place, tensor);
|
||||||
|
}
|
||||||
|
|
||||||
|
template <typename T>
|
||||||
|
struct LeakyReluGradGradEachElementFunctor {
|
||||||
|
LeakyReluGradGradEachElementFunctor(const T *ddx, const T *out, T alpha,
|
||||||
|
T *ddout)
|
||||||
|
: ddx_(ddx), out_(out), alpha_(alpha), ddout_(ddout) {}
|
||||||
|
|
||||||
|
HOSTDEVICE void operator()(int idx) {
|
||||||
|
if (out_[idx] >= 0) {
|
||||||
|
ddout_[idx] = ddx_[idx];
|
||||||
|
} else {
|
||||||
|
ddout_[idx] = ddx_[idx] * alpha_;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
const T *ddx_;
|
||||||
|
const T *out_;
|
||||||
|
T alpha_;
|
||||||
|
T *ddout_;
|
||||||
|
};
|
||||||
|
|
||||||
|
template <typename T>
|
||||||
|
static bool TestLeakyReluGradGradMain(const framework::DDim &dim,
|
||||||
|
const platform::Place &place,
|
||||||
|
float alpha) {
|
||||||
|
LeakyReluGradGradFunctor<T> functor;
|
||||||
|
functor.alpha = alpha;
|
||||||
|
auto &dev_ctx = *platform::DeviceContextPool::Instance().Get(place);
|
||||||
|
framework::Tensor *x = nullptr;
|
||||||
|
framework::Tensor *dout = nullptr;
|
||||||
|
framework::Tensor *dx = nullptr;
|
||||||
|
|
||||||
|
framework::Tensor out;
|
||||||
|
out.Resize(dim);
|
||||||
|
InitRandom<T>(&out, place);
|
||||||
|
|
||||||
|
framework::Tensor ddx;
|
||||||
|
ddx.Resize(dim);
|
||||||
|
InitRandom<T>(&ddx, place);
|
||||||
|
|
||||||
|
framework::Tensor ddout;
|
||||||
|
ddout.Resize(dim);
|
||||||
|
InitRandom<T>(&ddout, place);
|
||||||
|
|
||||||
|
framework::Tensor ddout_actual;
|
||||||
|
ddout_actual.mutable_data<T>(dim, place);
|
||||||
|
LeakyReluGradGradEachElementFunctor<T> actual_functor(
|
||||||
|
ddx.data<T>(), out.data<T>(), static_cast<T>(alpha),
|
||||||
|
ddout_actual.data<T>());
|
||||||
|
|
||||||
|
int64_t limit = out.numel();
|
||||||
|
|
||||||
|
#ifdef __NVCC__
|
||||||
|
if (platform::is_gpu_place(place)) {
|
||||||
|
auto &cuda_dev_ctx = dynamic_cast<platform::CUDADeviceContext &>(dev_ctx);
|
||||||
|
functor(cuda_dev_ctx, x, &out, &ddx, &ddout, dout, dx);
|
||||||
|
platform::ForRange<platform::CUDADeviceContext> for_range(cuda_dev_ctx,
|
||||||
|
limit);
|
||||||
|
for_range(actual_functor);
|
||||||
|
} else {
|
||||||
|
#endif
|
||||||
|
auto &cpu_dev_ctx = dynamic_cast<platform::CPUDeviceContext &>(dev_ctx);
|
||||||
|
functor(cpu_dev_ctx, x, &out, &ddx, &ddout, dout, dx);
|
||||||
|
platform::ForRange<platform::CPUDeviceContext> for_range(cpu_dev_ctx,
|
||||||
|
limit);
|
||||||
|
for_range(actual_functor);
|
||||||
|
#ifdef __NVCC__
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
|
dev_ctx.Wait();
|
||||||
|
|
||||||
|
framework::Tensor ddout_cpu, ddout_actual_cpu;
|
||||||
|
framework::TensorCopySync(ddout, platform::CPUPlace(), &ddout_cpu);
|
||||||
|
framework::TensorCopySync(ddout_actual, platform::CPUPlace(),
|
||||||
|
&ddout_actual_cpu);
|
||||||
|
|
||||||
|
bool is_equal = std::equal(ddout_cpu.data<T>(), ddout_cpu.data<T>() + limit,
|
||||||
|
ddout_actual_cpu.data<T>());
|
||||||
|
return is_equal;
|
||||||
|
}
|
||||||
|
|
||||||
|
} // namespace operators
|
||||||
|
} // namespace paddle
|
Loading…
Reference in new issue