commit
5471e87f3d
@ -0,0 +1,27 @@
|
||||
file(GLOB h_files . *_op.h)
|
||||
file(GLOB cpp_files . *_op.cpp)
|
||||
|
||||
list(APPEND h_files Function.h)
|
||||
list(APPEND cpp_files Function.cpp)
|
||||
|
||||
if(WITH_GPU)
|
||||
file(GLOB cu_files . *_op_gpu.cu)
|
||||
cuda_compile(cu_objs ${cu_files})
|
||||
endif()
|
||||
|
||||
add_library(paddle_function STATIC ${cpp_files} ${cu_objs})
|
||||
|
||||
add_library(paddle_test_main STATIC TestMain.cpp)
|
||||
|
||||
if(WITH_GPU)
|
||||
# TODO:
|
||||
# file(GLOB test_files . *_op_test.cpp)
|
||||
# add_executable(${test_bin} EXCLUDE_FROM_ALL ${test_files})
|
||||
add_simple_unittest(cross_map_normal_op_test)
|
||||
endif()
|
||||
|
||||
add_style_check_target(paddle_function ${h_files})
|
||||
add_style_check_target(paddle_function ${cpp_files})
|
||||
if(WITH_GPU)
|
||||
add_style_check_target(paddle_function ${cu_files})
|
||||
endif()
|
@ -0,0 +1,49 @@
|
||||
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License. */
|
||||
|
||||
#include "Function.h"
|
||||
|
||||
namespace paddle {
|
||||
|
||||
template <>
|
||||
size_t FuncConfig::get<size_t>(const std::string& key) const {
|
||||
auto it = valueMap_.find(key);
|
||||
CHECK(it != valueMap_.end()) << "Cannot find value: '" << key << "'";
|
||||
return it->second.s;
|
||||
}
|
||||
|
||||
template <>
|
||||
real FuncConfig::get<real>(const std::string& key) const {
|
||||
auto it = valueMap_.find(key);
|
||||
CHECK(it != valueMap_.end()) << "Cannot find value: '" << key << "'";
|
||||
return it->second.r;
|
||||
}
|
||||
|
||||
template <>
|
||||
FuncConfig& FuncConfig::set<size_t>(const std::string& key, size_t v) {
|
||||
CHECK(valueMap_.count(key) == 0) << "Duplicated value: " << key;
|
||||
valueMap_[key].s = v;
|
||||
return *this;
|
||||
}
|
||||
|
||||
template <>
|
||||
FuncConfig& FuncConfig::set<real>(const std::string& key, real v) {
|
||||
CHECK(valueMap_.count(key) == 0) << "Duplicated value: " << key;
|
||||
valueMap_[key].r = v;
|
||||
return *this;
|
||||
}
|
||||
|
||||
ClassRegistrar<FunctionBase> FunctionBase::funcRegistrar_;
|
||||
|
||||
} // namespace paddle
|
@ -0,0 +1,96 @@
|
||||
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License. */
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <map>
|
||||
#include <vector>
|
||||
#include "paddle/math/Matrix.h"
|
||||
#include "paddle/utils/ClassRegistrar.h"
|
||||
|
||||
namespace paddle {
|
||||
|
||||
enum DeviceType {
|
||||
DEVICE_TYPE_UNSPECIFIED = 0,
|
||||
DEVICE_TYPE_CPU = 1,
|
||||
DEVICE_TYPE_GPU = 2,
|
||||
};
|
||||
|
||||
template <DeviceType Device>
|
||||
struct MatrixT;
|
||||
|
||||
template <>
|
||||
struct MatrixT<DEVICE_TYPE_CPU> {
|
||||
using type = CpuMatrix;
|
||||
};
|
||||
|
||||
template <>
|
||||
struct MatrixT<DEVICE_TYPE_GPU> {
|
||||
using type = GpuMatrix;
|
||||
};
|
||||
|
||||
typedef std::vector<size_t> Dims;
|
||||
|
||||
class Tensor {
|
||||
public:
|
||||
Tensor(real* data, const Dims& dim) : buf_(data), dims_(dim) {}
|
||||
|
||||
real* getData() const { return buf_; }
|
||||
|
||||
real* buf_;
|
||||
Dims dims_;
|
||||
};
|
||||
|
||||
typedef std::vector<Tensor> Arguments;
|
||||
|
||||
class FuncConfig {
|
||||
public:
|
||||
union value {
|
||||
size_t s;
|
||||
real r;
|
||||
};
|
||||
|
||||
template <typename T>
|
||||
T get(const std::string& key) const;
|
||||
|
||||
template <typename T>
|
||||
FuncConfig& set(const std::string& key, T v);
|
||||
|
||||
protected:
|
||||
std::map<std::string, value> valueMap_;
|
||||
};
|
||||
|
||||
class FunctionBase {
|
||||
public:
|
||||
virtual ~FunctionBase() {}
|
||||
|
||||
virtual void init(const FuncConfig& config) {}
|
||||
|
||||
virtual void calc(const Arguments& inputs,
|
||||
const Arguments& outputs,
|
||||
const Arguments& inouts) {}
|
||||
|
||||
static ClassRegistrar<FunctionBase> funcRegistrar_;
|
||||
};
|
||||
|
||||
#define FUNC_NAME(typeName, deviceName) #typeName "-" #deviceName
|
||||
|
||||
#define REGISTER_TYPED_FUNC(typeName, deviceName, className) \
|
||||
static InitFunction __reg_type_##typeName##deviceName([]() { \
|
||||
FunctionBase::funcRegistrar_ \
|
||||
.registerClass<className<DEVICE_TYPE_##deviceName>>( \
|
||||
FUNC_NAME(typeName, deviceName)); \
|
||||
})
|
||||
|
||||
} // namespace paddle
|
@ -0,0 +1,102 @@
|
||||
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License. */
|
||||
|
||||
#include "Function.h"
|
||||
#include "paddle/math/Vector.h"
|
||||
#include "paddle/math/tests/TensorCheck.h"
|
||||
|
||||
namespace paddle {
|
||||
|
||||
class FunctionCompare {
|
||||
public:
|
||||
FunctionCompare(const std::string& name, const FuncConfig& config)
|
||||
: cpu(FunctionBase::funcRegistrar_.createByType(name + "-CPU")),
|
||||
gpu(FunctionBase::funcRegistrar_.createByType(name + "-GPU")) {
|
||||
cpu->init(config);
|
||||
gpu->init(config);
|
||||
}
|
||||
|
||||
void cmpWithArg(const Arguments& inputs,
|
||||
const Arguments& outputs,
|
||||
const Arguments& inouts) {
|
||||
// init cpu and gpu arguments
|
||||
auto initArgs = [=](
|
||||
Arguments& cpuArgs, Arguments& gpuArgs, const Arguments& inArgs) {
|
||||
for (auto arg : inArgs) {
|
||||
size_t size = sizeof(real);
|
||||
for (auto dim : arg.dims_) {
|
||||
size *= dim;
|
||||
}
|
||||
cpuMemory.emplace_back(std::make_shared<CpuMemoryHandle>(size));
|
||||
gpuMemory.emplace_back(std::make_shared<GpuMemoryHandle>(size));
|
||||
cpuArgs.emplace_back(
|
||||
Tensor((real*)cpuMemory.back()->getBuf(), arg.dims_));
|
||||
gpuArgs.emplace_back(
|
||||
Tensor((real*)gpuMemory.back()->getBuf(), arg.dims_));
|
||||
|
||||
// will use an api to refactor this code.
|
||||
CpuVector cpuVector(size / sizeof(real),
|
||||
(real*)cpuArgs.back().getData());
|
||||
GpuVector gpuVector(size / sizeof(real),
|
||||
(real*)gpuArgs.back().getData());
|
||||
cpuVector.uniform(0.001, 1);
|
||||
gpuVector.copyFrom(cpuVector);
|
||||
}
|
||||
};
|
||||
initArgs(cpuInputs, gpuInputs, inputs);
|
||||
initArgs(cpuOutputs, gpuOutputs, outputs);
|
||||
initArgs(cpuInouts, gpuInouts, inouts);
|
||||
|
||||
// function calculate
|
||||
cpu->calc(cpuInputs, cpuOutputs, cpuInouts);
|
||||
gpu->calc(gpuInputs, gpuOutputs, gpuInouts);
|
||||
|
||||
// check outputs and inouts
|
||||
auto checkArgs = [=](const Arguments& cpuArgs, const Arguments& gpuArgs) {
|
||||
for (size_t i = 0; i < cpuArgs.size(); i++) {
|
||||
auto cpu = cpuArgs[i];
|
||||
auto gpu = gpuArgs[i];
|
||||
size_t size = 1;
|
||||
for (auto dim : cpu.dims_) {
|
||||
size *= dim;
|
||||
}
|
||||
CpuVector cpuVector(size, (real*)cpu.getData());
|
||||
GpuVector gpuVector(size, (real*)gpu.getData());
|
||||
|
||||
autotest::TensorCheckErr(cpuVector, gpuVector);
|
||||
}
|
||||
};
|
||||
checkArgs(cpuOutputs, gpuOutputs);
|
||||
checkArgs(cpuInouts, gpuInouts);
|
||||
}
|
||||
|
||||
protected:
|
||||
std::shared_ptr<FunctionBase> cpu;
|
||||
std::shared_ptr<FunctionBase> gpu;
|
||||
std::vector<CpuMemHandlePtr> cpuMemory;
|
||||
std::vector<GpuMemHandlePtr> gpuMemory;
|
||||
Arguments cpuInputs;
|
||||
Arguments cpuOutputs;
|
||||
Arguments cpuInouts;
|
||||
Arguments gpuInputs;
|
||||
Arguments gpuOutputs;
|
||||
Arguments gpuInouts;
|
||||
};
|
||||
|
||||
} // namespace paddle
|
||||
|
||||
using paddle::FunctionCompare;
|
||||
using paddle::FuncConfig;
|
||||
using paddle::Dims;
|
||||
using paddle::Tensor;
|
@ -0,0 +1,22 @@
|
||||
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License. */
|
||||
|
||||
#include <gtest/gtest.h>
|
||||
#include "paddle/utils/Util.h"
|
||||
|
||||
int main(int argc, char** argv) {
|
||||
testing::InitGoogleTest(&argc, argv);
|
||||
paddle::initMain(argc, argv);
|
||||
return RUN_ALL_TESTS();
|
||||
}
|
@ -0,0 +1,227 @@
|
||||
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License. */
|
||||
|
||||
#include "cross_map_normal_op.h"
|
||||
#include "paddle/math/Vector.h"
|
||||
|
||||
namespace paddle {
|
||||
|
||||
template <>
|
||||
void CrossMapNormal<DEVICE_TYPE_CPU>(real* outputs,
|
||||
real* denoms,
|
||||
const real* inputs,
|
||||
size_t numSamples,
|
||||
size_t channels,
|
||||
size_t height,
|
||||
size_t width,
|
||||
size_t size,
|
||||
real scale,
|
||||
real pow) {
|
||||
size_t oneImage = height * width;
|
||||
size_t oneSample = channels * oneImage;
|
||||
|
||||
CpuVector outputsV(numSamples * oneSample, outputs);
|
||||
CpuVector inputsV(numSamples * oneSample, const_cast<real*>(inputs));
|
||||
CpuVector denomsV(numSamples * oneSample, denoms);
|
||||
|
||||
// f(x) = x * ( 1 + scale * SUM((x)^2) )^(-pow)
|
||||
// x represents inputs
|
||||
// f(x) represents outputs
|
||||
// denoms save the intermediate result for backward
|
||||
denomsV = denomsV.constant(1.0);
|
||||
const int start = -((int)size - 1) / 2;
|
||||
const int end = (int)size + start;
|
||||
for (size_t i = 0; i < numSamples; i++) {
|
||||
real* oneDenom = denoms + i * oneSample;
|
||||
real* oneInput = const_cast<real*>(inputs) + i * oneSample;
|
||||
for (int c = 0; c < (int)channels; c++) {
|
||||
CpuVector denom(oneImage, oneDenom + c * oneImage);
|
||||
for (int s = start; s < end; s++) {
|
||||
if (c + s >= 0 && c + s < (int)channels) {
|
||||
CpuVector input(oneImage, oneInput + (c + s) * oneImage);
|
||||
denom += input.square() * scale;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
outputsV = inputsV * denomsV.pow(-pow);
|
||||
}
|
||||
|
||||
template <>
|
||||
void CrossMapNormalGrad<DEVICE_TYPE_CPU>(real* inputsGrad,
|
||||
const real* inputsValue,
|
||||
const real* outputsValue,
|
||||
const real* outputsGrad,
|
||||
const real* denoms,
|
||||
size_t numSamples,
|
||||
size_t channels,
|
||||
size_t height,
|
||||
size_t width,
|
||||
size_t size,
|
||||
real scale,
|
||||
real pow) {
|
||||
size_t oneSample = channels * height * width;
|
||||
std::function<CpuVector(real*, size_t)> oneImage = [=](real* data,
|
||||
size_t offset) {
|
||||
return CpuVector(height * width, data + offset);
|
||||
};
|
||||
|
||||
const int start = -((int)size) / 2;
|
||||
const int end = (int)size + start;
|
||||
const real ratio = -(real)2 * scale * pow;
|
||||
for (size_t i = 0; i < numSamples; i++) {
|
||||
size_t sOffset = i * oneSample;
|
||||
real* oneInputGrad = inputsGrad + sOffset;
|
||||
real* oneInputValue = const_cast<real*>(inputsValue) + sOffset;
|
||||
real* oneDenom = const_cast<real*>(denoms) + sOffset;
|
||||
real* oneOutputGrad = const_cast<real*>(outputsGrad) + sOffset;
|
||||
real* oneOutputValue = const_cast<real*>(outputsValue) + sOffset;
|
||||
|
||||
for (int c = 0; c < (int)channels; c++) {
|
||||
size_t cOffset = c * height * width;
|
||||
CpuVector inputGrad = oneImage(oneInputGrad, cOffset);
|
||||
CpuVector inputValue = oneImage(oneInputValue, cOffset);
|
||||
CpuVector denom = oneImage(oneDenom, cOffset);
|
||||
CpuVector outputGrad = oneImage(oneOutputGrad, cOffset);
|
||||
|
||||
inputGrad = inputGrad + denom.pow(-pow) * outputGrad;
|
||||
for (int s = start; s < end; s++) {
|
||||
if (c + s >= 0 && c + s < (int)channels) {
|
||||
size_t offset = (c + s) * height * width;
|
||||
CpuVector output = oneImage(oneOutputValue, offset);
|
||||
CpuVector outputGrad = oneImage(oneOutputGrad, offset);
|
||||
CpuVector denom = oneImage(oneDenom, offset);
|
||||
|
||||
inputGrad += ((outputGrad * output * ratio) / denom) * inputValue;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* \param inputs[0] input value.
|
||||
* \param outputs[0] output value.
|
||||
* \param outputs[1] denoms.
|
||||
*/
|
||||
template <DeviceType Device>
|
||||
class CrossMapNormalFunc : public FunctionBase {
|
||||
public:
|
||||
void init(const FuncConfig& config) override {
|
||||
size_ = config.get<size_t>("size");
|
||||
scale_ = config.get<real>("scale");
|
||||
pow_ = config.get<real>("pow");
|
||||
}
|
||||
|
||||
void calc(const Arguments& inputs,
|
||||
const Arguments& outputs,
|
||||
const Arguments& inouts) override {
|
||||
CHECK_EQ(1, inputs.size());
|
||||
CHECK_EQ(2, outputs.size());
|
||||
CHECK_EQ(0, inouts.size());
|
||||
|
||||
CHECK_EQ(inputs[0].dims_.size(), 4);
|
||||
for (size_t i = 0; i < inputs[0].dims_.size(); i++) {
|
||||
CHECK_EQ(inputs[0].dims_[i], outputs[0].dims_[i]);
|
||||
CHECK_EQ(inputs[0].dims_[i], outputs[1].dims_[i]);
|
||||
}
|
||||
|
||||
size_t samples = inputs[0].dims_[0];
|
||||
size_t channels = inputs[0].dims_[1];
|
||||
size_t height = inputs[0].dims_[2];
|
||||
size_t width = inputs[0].dims_[3];
|
||||
|
||||
CrossMapNormal<Device>(outputs[0].getData(),
|
||||
outputs[1].getData(),
|
||||
inputs[0].getData(),
|
||||
samples,
|
||||
channels,
|
||||
height,
|
||||
width,
|
||||
size_,
|
||||
scale_,
|
||||
pow_);
|
||||
}
|
||||
|
||||
private:
|
||||
size_t size_;
|
||||
real scale_;
|
||||
real pow_;
|
||||
};
|
||||
|
||||
/**
|
||||
* \param inputs[0] input value.
|
||||
* \param inputs[1] output value.
|
||||
* \param inputs[2] output grad.
|
||||
* \param inputs[3] denoms.
|
||||
* \param outputs[0] input grad.
|
||||
*/
|
||||
template <DeviceType Device>
|
||||
class CrossMapNormalGradFunc : public FunctionBase {
|
||||
public:
|
||||
void init(const FuncConfig& config) override {
|
||||
size_ = config.get<size_t>("size");
|
||||
scale_ = config.get<real>("scale");
|
||||
pow_ = config.get<real>("pow");
|
||||
}
|
||||
|
||||
void calc(const Arguments& inputs,
|
||||
const Arguments& outputs,
|
||||
const Arguments& inouts) override {
|
||||
CHECK_EQ(4, inputs.size());
|
||||
CHECK_EQ(1, outputs.size());
|
||||
CHECK_EQ(0, inouts.size());
|
||||
|
||||
CHECK_EQ(inputs[0].dims_.size(), 4);
|
||||
for (size_t i = 0; i < inputs[0].dims_.size(); i++) {
|
||||
CHECK_EQ(inputs[0].dims_[i], inputs[1].dims_[i]);
|
||||
CHECK_EQ(inputs[0].dims_[i], inputs[2].dims_[i]);
|
||||
CHECK_EQ(inputs[0].dims_[i], inputs[3].dims_[i]);
|
||||
CHECK_EQ(inputs[0].dims_[i], outputs[0].dims_[i]);
|
||||
}
|
||||
|
||||
size_t samples = inputs[0].dims_[0];
|
||||
size_t channels = inputs[0].dims_[1];
|
||||
size_t height = inputs[0].dims_[2];
|
||||
size_t width = inputs[0].dims_[3];
|
||||
|
||||
CrossMapNormalGrad<Device>(outputs[0].getData(),
|
||||
inputs[0].getData(),
|
||||
inputs[1].getData(),
|
||||
inputs[2].getData(),
|
||||
inputs[3].getData(),
|
||||
samples,
|
||||
channels,
|
||||
height,
|
||||
width,
|
||||
size_,
|
||||
scale_,
|
||||
pow_);
|
||||
}
|
||||
|
||||
private:
|
||||
size_t size_;
|
||||
real scale_;
|
||||
real pow_;
|
||||
};
|
||||
|
||||
REGISTER_TYPED_FUNC(CrossMapNormal, CPU, CrossMapNormalFunc);
|
||||
REGISTER_TYPED_FUNC(CrossMapNormalGrad, CPU, CrossMapNormalGradFunc);
|
||||
#ifndef PADDLE_ONLY_CPU
|
||||
REGISTER_TYPED_FUNC(CrossMapNormal, GPU, CrossMapNormalFunc);
|
||||
REGISTER_TYPED_FUNC(CrossMapNormalGrad, GPU, CrossMapNormalGradFunc);
|
||||
#endif
|
||||
|
||||
} // namespace paddle
|
@ -0,0 +1,81 @@
|
||||
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License. */
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "Function.h"
|
||||
|
||||
namespace paddle {
|
||||
|
||||
/**
|
||||
* \brief Cross map respose normalize forward.
|
||||
* The data structure of image data is NCHW.
|
||||
*
|
||||
* \param[out] outputs output data.
|
||||
* \param[in] denoms denoms buffer.
|
||||
* \param[in] inputs input data.
|
||||
* \param[in] numSamples batch size of input image.
|
||||
* \param[in] channels number of channel.
|
||||
* \param[in] height image height.
|
||||
* \param[in] width image width.
|
||||
* \param[in] size size.
|
||||
* \param[in] scale scale.
|
||||
* \param[in] pow scale.
|
||||
*
|
||||
*/
|
||||
template <DeviceType Device>
|
||||
void CrossMapNormal(real* outputs,
|
||||
real* denoms,
|
||||
const real* inputs,
|
||||
size_t numSamples,
|
||||
size_t channels,
|
||||
size_t height,
|
||||
size_t width,
|
||||
size_t size,
|
||||
real scale,
|
||||
real pow);
|
||||
|
||||
/**
|
||||
* \brief Cross map respose normalize backward.
|
||||
* The data structure of image data is NCHW.
|
||||
*
|
||||
* \param[out] inputsGrad input grad.
|
||||
* \param[in] inputsValue input value.
|
||||
* \param[out] outputsValue output value.
|
||||
* \param[out] outputsGrad output grad.
|
||||
* \param[in] denoms denoms buffer.
|
||||
* \param[in] numSamples batch size of input image.
|
||||
* \param[in] channels number of channel.
|
||||
* \param[in] height image height.
|
||||
* \param[in] width image width.
|
||||
* \param[in] size size.
|
||||
* \param[in] scale scale.
|
||||
* \param[in] pow scale.
|
||||
*
|
||||
*/
|
||||
template <DeviceType Device>
|
||||
void CrossMapNormalGrad(real* inputsGrad,
|
||||
const real* inputsValue,
|
||||
const real* outputsValue,
|
||||
const real* outputsGrad,
|
||||
const real* denoms,
|
||||
size_t numSamples,
|
||||
size_t channels,
|
||||
size_t height,
|
||||
size_t width,
|
||||
size_t size,
|
||||
real scale,
|
||||
real pow);
|
||||
|
||||
} // namespace paddle
|
@ -0,0 +1,156 @@
|
||||
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License. */
|
||||
|
||||
#include "hl_base.h"
|
||||
#include "cross_map_normal_op.h"
|
||||
|
||||
namespace paddle {
|
||||
|
||||
__global__ void KeCMRNormFillScale(size_t imageSize, const real* in,
|
||||
real* scale, size_t channels,
|
||||
size_t height, size_t width, size_t size,
|
||||
real alpha) {
|
||||
const int idx = threadIdx.x + blockIdx.x * blockDim.x;
|
||||
if (idx < imageSize) {
|
||||
const int w = idx % width;
|
||||
const int h = (idx / width) % height;
|
||||
const int n = idx / width / height;
|
||||
const int offset = (n * channels * height + h) * width + w;
|
||||
|
||||
in += offset;
|
||||
scale += offset;
|
||||
const int step = height * width;
|
||||
const int pre_pad = (size - 1) / 2;
|
||||
const int post_pad = size - pre_pad - 1;
|
||||
|
||||
real accum = 0;
|
||||
int index = 0;
|
||||
while (index < channels + post_pad) {
|
||||
if (index < channels) {
|
||||
accum += in[index * step] * in[index * step];
|
||||
}
|
||||
if (index >= size) {
|
||||
accum -= in[(index - size) * step] * in[(index - size) * step];
|
||||
}
|
||||
if (index >= post_pad) {
|
||||
scale[(index - post_pad) * step] = 1. + accum * alpha;
|
||||
}
|
||||
++index;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
__global__ void KeCMRNormOutput(size_t inputSize, const real* in,
|
||||
const real* scale, real negative_beta,
|
||||
real* out) {
|
||||
const int index = threadIdx.x + blockIdx.x * blockDim.x;
|
||||
if (index < inputSize) {
|
||||
out[index] = in[index] * pow(scale[index], negative_beta);
|
||||
}
|
||||
}
|
||||
|
||||
template <>
|
||||
void CrossMapNormal<DEVICE_TYPE_GPU>(real* outputs,
|
||||
real* denoms,
|
||||
const real* inputs,
|
||||
size_t numSamples,
|
||||
size_t channels,
|
||||
size_t height,
|
||||
size_t width,
|
||||
size_t size,
|
||||
real scale,
|
||||
real pow) {
|
||||
size_t imageSize = numSamples * height * width;
|
||||
int blockSize = 1024;
|
||||
int gridSize = (imageSize + 1024 - 1) / 1024;
|
||||
KeCMRNormFillScale<<<gridSize, blockSize, 0, STREAM_DEFAULT>>>
|
||||
(imageSize, inputs, denoms, channels, height, width, size, scale);
|
||||
|
||||
size_t inputSize = numSamples * height * width *channels;
|
||||
blockSize = 1024;
|
||||
gridSize = (inputSize + 1024 - 1) / 1024;
|
||||
KeCMRNormOutput<<<gridSize, blockSize, 0, STREAM_DEFAULT>>>
|
||||
(inputSize, inputs, denoms, -pow, outputs);
|
||||
|
||||
CHECK_SYNC("CrossMapNormal");
|
||||
}
|
||||
|
||||
__global__ void KeCMRNormDiff(size_t imageSize, const real* bottom_data,
|
||||
const real* top_data, const real* scale,
|
||||
const real* top_diff, size_t channels,
|
||||
size_t height, size_t width, size_t size,
|
||||
real negative_beta, real cache_ratio,
|
||||
real* bottom_diff ) {
|
||||
const int idx = threadIdx.x + blockIdx.x * blockDim.x;
|
||||
if (idx < imageSize) {
|
||||
const int w = idx % width;
|
||||
const int h = (idx / width) % height;
|
||||
const int n = idx / width / height;
|
||||
const int offset = (n * channels * height + h) * width + w;
|
||||
bottom_data += offset;
|
||||
top_data += offset;
|
||||
scale += offset;
|
||||
top_diff += offset;
|
||||
bottom_diff += offset;
|
||||
|
||||
const int step = height * width;
|
||||
const int pre_pad = size - (size + 1) / 2;
|
||||
const int post_pad = size - pre_pad - 1;
|
||||
|
||||
int index = 0;
|
||||
real accum = 0;
|
||||
while (index < channels + post_pad) {
|
||||
if (index < channels) {
|
||||
accum += top_diff[index * step] * top_data[index * step] /
|
||||
scale[index * step];
|
||||
}
|
||||
if (index >= size) {
|
||||
accum -= top_diff[(index - size) * step] *
|
||||
top_data[(index - size) * step] / scale[(index - size) * step];
|
||||
}
|
||||
if (index >= post_pad) {
|
||||
bottom_diff[(index - post_pad) * step] +=
|
||||
top_diff[(index - post_pad) * step] *
|
||||
pow(scale[(index - post_pad) * step], negative_beta) - cache_ratio *
|
||||
bottom_data[(index - post_pad) * step] * accum;
|
||||
}
|
||||
++index;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
template <>
|
||||
void CrossMapNormalGrad<DEVICE_TYPE_GPU>(real* inputsGrad,
|
||||
const real* inputsValue,
|
||||
const real* outputsValue,
|
||||
const real* outputsGrad,
|
||||
const real* denoms,
|
||||
size_t numSamples,
|
||||
size_t channels,
|
||||
size_t height,
|
||||
size_t width,
|
||||
size_t size,
|
||||
real scale,
|
||||
real pow) {
|
||||
size_t imageSize = numSamples * height * width;
|
||||
|
||||
int blockSize = 1024;
|
||||
int gridSize = (imageSize + 1024 - 1) / 1024;
|
||||
KeCMRNormDiff <<<gridSize, blockSize, 0, STREAM_DEFAULT>>>
|
||||
(imageSize, inputsValue, outputsValue, denoms, outputsGrad, channels,
|
||||
height, width, size, -pow, 2.0f * pow * scale, inputsGrad);
|
||||
CHECK_SYNC("CrossMapNormalGrad");
|
||||
}
|
||||
|
||||
} // namespace paddle
|
@ -0,0 +1,71 @@
|
||||
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License. */
|
||||
|
||||
#include <gtest/gtest.h>
|
||||
#include "FunctionTest.h"
|
||||
|
||||
TEST(CrossMapNormal, real) {
|
||||
for (size_t numSamples : {5, 32}) {
|
||||
for (size_t channels : {1, 5, 32}) {
|
||||
for (size_t imgSizeH : {5, 33, 100}) {
|
||||
for (size_t imgSizeW : {5, 32, 96}) {
|
||||
for (size_t size : {1, 2, 3, 5, 7}) {
|
||||
VLOG(3) << " numSamples=" << numSamples << " channels=" << channels
|
||||
<< " imgSizeH=" << imgSizeH << " imgSizeW=" << imgSizeW
|
||||
<< " size=" << size;
|
||||
|
||||
FunctionCompare compare("CrossMapNormal",
|
||||
FuncConfig()
|
||||
.set("size", size)
|
||||
.set("scale", (real)1.5)
|
||||
.set("pow", (real)0.5));
|
||||
Dims dims{numSamples, channels, imgSizeH, imgSizeW};
|
||||
compare.cmpWithArg({Tensor(nullptr, dims)},
|
||||
{Tensor(nullptr, dims), Tensor(nullptr, dims)},
|
||||
{});
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
TEST(CrossMapNormalGrad, real) {
|
||||
for (size_t numSamples : {5, 32}) {
|
||||
for (size_t channels : {1, 5, 32}) {
|
||||
for (size_t imgSizeH : {5, 33, 100}) {
|
||||
for (size_t imgSizeW : {5, 32, 96}) {
|
||||
for (size_t size : {1, 2, 3, 5, 7}) {
|
||||
VLOG(3) << " numSamples=" << numSamples << " channels=" << channels
|
||||
<< " imgSizeH=" << imgSizeH << " imgSizeW=" << imgSizeW
|
||||
<< " size=" << size;
|
||||
|
||||
FunctionCompare compare("CrossMapNormalGrad",
|
||||
FuncConfig()
|
||||
.set("size", size)
|
||||
.set("scale", (real)1.5)
|
||||
.set("pow", (real)0.5));
|
||||
Dims dims{numSamples, channels, imgSizeH, imgSizeW};
|
||||
compare.cmpWithArg({Tensor(nullptr, dims),
|
||||
Tensor(nullptr, dims),
|
||||
Tensor(nullptr, dims),
|
||||
Tensor(nullptr, dims)},
|
||||
{Tensor(nullptr, dims)},
|
||||
{});
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in new issue