parent
94d83fcd98
commit
cb6436b50c
@ -0,0 +1,172 @@
|
||||
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License. */
|
||||
|
||||
#include "RowConvOp.h"
|
||||
#include "paddle/math/Vector.h"
|
||||
|
||||
namespace paddle {
|
||||
|
||||
template <>
|
||||
void RowConv<DEVICE_TYPE_CPU>(CpuMatrix& out,
|
||||
const CpuMatrix& in,
|
||||
const CpuMatrix& filter,
|
||||
const CpuIVector& seq) {
|
||||
const int* starts = seq.getData();
|
||||
const size_t numSeq = seq.getSize() - 1;
|
||||
const size_t contextLength = filter.getHeight();
|
||||
for (size_t i = 0; i < numSeq; ++i) {
|
||||
size_t begin = starts[i];
|
||||
size_t end = starts[i + 1];
|
||||
for (size_t j = begin; j < end; ++j) {
|
||||
MatrixPtr x;
|
||||
MatrixPtr w;
|
||||
if ((j + contextLength) < end) {
|
||||
x = (const_cast<CpuMatrix&>(in)).subMatrix(j, contextLength);
|
||||
w = (const_cast<CpuMatrix&>(filter)).subMatrix(0, contextLength);
|
||||
} else {
|
||||
x = (const_cast<CpuMatrix&>(in)).subMatrix(j, end - j);
|
||||
w = (const_cast<CpuMatrix&>(filter)).subMatrix(0, end - j);
|
||||
}
|
||||
MatrixPtr y = out.subMatrix(j, 1);
|
||||
y->addDotMulVMM(*x, *w);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
template <>
|
||||
void RowConvGrad<DEVICE_TYPE_CPU>(const CpuMatrix& outG,
|
||||
const CpuMatrix& in,
|
||||
const CpuMatrix& filter,
|
||||
CpuMatrix& inG,
|
||||
CpuMatrix& filterG,
|
||||
const CpuIVector& seq) {
|
||||
// gradient w.r.t filter
|
||||
const int* starts = seq.getData();
|
||||
const size_t numSeq = seq.getSize() - 1;
|
||||
const size_t contextLength = filter.getHeight();
|
||||
if (filterG) {
|
||||
for (size_t i = 0; i < numSeq; ++i) {
|
||||
size_t begin = starts[i];
|
||||
size_t end = starts[i + 1];
|
||||
size_t steps = end - begin;
|
||||
for (size_t j = 0; j < contextLength; ++j) {
|
||||
MatrixPtr x =
|
||||
(const_cast<CpuMatrix&>(in)).subMatrix(begin + j, steps - j);
|
||||
MatrixPtr dy =
|
||||
(const_cast<CpuMatrix&>(outG)).subMatrix(begin, steps - j);
|
||||
MatrixPtr dw = filterG.subMatrix(j, 1);
|
||||
dw->addDotMulVMM(*dy, *x);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// gradient w.r.t input feature
|
||||
if (inG) {
|
||||
for (size_t i = 0; i < numSeq; ++i) {
|
||||
size_t begin = starts[i];
|
||||
size_t end = starts[i + 1];
|
||||
size_t steps = end - begin;
|
||||
for (size_t j = 0; j < steps; ++j) {
|
||||
MatrixPtr dx = inG.subMatrix(begin + j, 1);
|
||||
for (size_t t = 0; t < contextLength; ++t) {
|
||||
if ((int(j) - int(t)) >= 0) {
|
||||
MatrixPtr dy =
|
||||
(const_cast<CpuMatrix&>(outG)).subMatrix(begin + j - t, 1);
|
||||
MatrixPtr w = (const_cast<CpuMatrix&>(filter)).subMatrix(t, 1);
|
||||
dx->addDotMul(*dy, *w, 1.0, 1.0);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* \brief TODO(qingqing)
|
||||
*
|
||||
*/
|
||||
|
||||
template <DeviceType Device>
|
||||
class RowConvFunc : public FunctionBase {
|
||||
public:
|
||||
void init(const FuncConfig& config) override {}
|
||||
|
||||
void calc(const BufferArgs& inputs, const BufferArgs& outputs) override {
|
||||
// check
|
||||
CHECK_EQ(2UL, inputs.size());
|
||||
CHECK_EQ(1UL, outputs.size());
|
||||
CHECK_EQ(outputs[0].getArgType(), ADD_TO);
|
||||
CHECK(inputs[0].isSequenceArg() && outputs[0].isSequenceArg())
|
||||
<< "SequenceArg required here.";
|
||||
const auto in = dynamic_cast<const SequenceArg&>(inputs[0]);
|
||||
auto out = dynamic_cast<const SequenceArg&>(outputs[0]);
|
||||
auto w = inputs[1];
|
||||
CHECK(in.data() && out.data() && in.getSequenceId().data());
|
||||
CHECK_EQ(in.shape().ndims(), 2UL);
|
||||
CHECK_EQ(out.shape().ndims(), 2UL);
|
||||
CHECK_EQ(in.shape()[1], out.shape()[1]);
|
||||
CHECK_EQ(in.shape()[0], out.shape()[0]);
|
||||
CHECK_EQ(w.shape()[1], in.shape()[1]);
|
||||
|
||||
auto outMat = out.matrix<Device>();
|
||||
const auto inMat = in.matrix<Device>();
|
||||
const auto wMat = w.matrix<Device>();
|
||||
const auto seqId = in.getSequenceId().vector<int, Device>();
|
||||
|
||||
RowConv<Device>(outMat, inMat, wMat, seqId);
|
||||
}
|
||||
};
|
||||
|
||||
/**
|
||||
* \brief The backward propagation of padding Function. Remove the elements
|
||||
* in the padding positions of forward.
|
||||
*
|
||||
* Argument in this Function:
|
||||
*/
|
||||
|
||||
template <DeviceType Device>
|
||||
class RowConvGradFunc : public FunctionBase {
|
||||
public:
|
||||
void init(const FuncConfig& config) override {}
|
||||
|
||||
void calc(const BufferArgs& inputs, const BufferArgs& outputs) override {
|
||||
const auto outGrad = dynamic_cast<const SequenceArg&>(inputs[0]);
|
||||
const auto in = dynamic_cast<const SequenceArg&>(inputs[1]);
|
||||
const auto w = inputs[2];
|
||||
auto inGrad = dynamic_cast<const SequenceArg&>(outputs[0]);
|
||||
auto wGrad = outputs[1];
|
||||
|
||||
const auto outGMat = outGrad.matrix<Device>();
|
||||
const auto inMat = in.matrix<Device>();
|
||||
const auto wMat = w.matrix<Device>();
|
||||
auto inGMat = inGrad.data()
|
||||
? inGrad.matrix<Device>()
|
||||
: typename Tensor<real, Device>::Matrix(nullptr, 0, 0);
|
||||
auto wGMat = wGrad.data()
|
||||
? wGrad.matrix<Device>()
|
||||
: typename Tensor<real, Device>::Matrix(nullptr, 0, 0);
|
||||
const auto seqId = in.getSequenceId().vector<int, Device>();
|
||||
|
||||
RowConvGrad<Device>(outGMat, inMat, wMat, inGMat, wGMat, seqId);
|
||||
}
|
||||
};
|
||||
|
||||
REGISTER_TYPED_FUNC(RowConv, CPU, RowConvFunc);
|
||||
REGISTER_TYPED_FUNC(RowConvGrad, CPU, RowConvGradFunc);
|
||||
#ifndef PADDLE_ONLY_CPU
|
||||
REGISTER_TYPED_FUNC(RowConv, GPU, RowConvFunc);
|
||||
REGISTER_TYPED_FUNC(RowConvGrad, GPU, PadGradFunc);
|
||||
#endif
|
||||
|
||||
} // namespace paddle
|
@ -0,0 +1,42 @@
|
||||
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License. */
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "Function.h"
|
||||
|
||||
namespace paddle {
|
||||
|
||||
/**
|
||||
* \brief TODO(qingqing)
|
||||
*
|
||||
*/
|
||||
template <DeviceType DType>
|
||||
void RowConv(typename Tensor<real, DType>::Matrix& out,
|
||||
const typename Tensor<real, DType>::Matrix& in,
|
||||
const typename Tensor<real, DType>::Matrix& filter,
|
||||
const typename Tensor<int, DType>::Vector& seq);
|
||||
|
||||
/**
|
||||
* \brief TODO(qingqing)
|
||||
*
|
||||
*/
|
||||
template <DeviceType DType>
|
||||
void RowConvGrad(const typename Tensor<real, DType>::Matrix& outG,
|
||||
const typename Tensor<real, DType>::Matrix& in,
|
||||
const typename Tensor<real, DType>::Matrix& filter,
|
||||
typename Tensor<real, DType>::Matrix& inG,
|
||||
typename Tensor<real, DType>::Matrix& filterG,
|
||||
const typename Tensor<int, DType>::Vector& seq);
|
||||
} // namespace paddle
|
@ -0,0 +1,105 @@
|
||||
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License. */
|
||||
|
||||
#include "RowConvLayer.h"
|
||||
#include "paddle/utils/Stat.h"
|
||||
|
||||
namespace paddle {
|
||||
|
||||
REGISTER_LAYER(row_conv, RowConvLayer);
|
||||
|
||||
bool RowConvLayer::init(const LayerMap& layerMap,
|
||||
const ParameterMap& parameterMap) {
|
||||
/* Initialize the basic parent class */
|
||||
Layer::init(layerMap, parameterMap);
|
||||
|
||||
contexLength_ = config_.inputs(0).row_conv_conf().context_length();
|
||||
|
||||
CHECK_EQ(inputLayers_.size(), 1UL);
|
||||
weight_.reset(new Weight(contexLength_, getSize(), parameters_[0]));
|
||||
createFunction(forward_, "RowConv", FuncConfig());
|
||||
createFunction(backward_, "RowConvGrad", FuncConfig());
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
void RowConvLayer::forward(PassType passType) {
|
||||
Layer::forward(passType);
|
||||
MatrixPtr input = getInputValue(0);
|
||||
size_t height = input->getHeight();
|
||||
size_t width = input->getWidth();
|
||||
CHECK_EQ(width, getSize());
|
||||
resetOutput(height, width);
|
||||
|
||||
const auto startPos = getInput(0).sequenceStartPositions->getVector(useGpu_);
|
||||
wDims_ = TensorShape({contexLength_, width});
|
||||
|
||||
MatrixPtr outV = getOutputValue();
|
||||
BufferArgs inputs;
|
||||
BufferArgs outputs;
|
||||
inputs.addArg(*getInputValue(0), *startPos);
|
||||
inputs.addArg(*weight_->getW(), wDims_);
|
||||
outputs.addArg(*getOutputValue(), *startPos, ADD_TO);
|
||||
|
||||
{
|
||||
REGISTER_TIMER_INFO("RowConvForward", getName().c_str());
|
||||
forward_[0]->calc(inputs, outputs);
|
||||
}
|
||||
|
||||
/* activation */ {
|
||||
REGISTER_TIMER_INFO("FwAtvTimer", getName().c_str());
|
||||
forwardActivation();
|
||||
}
|
||||
}
|
||||
|
||||
void RowConvLayer::backward(const UpdateCallback& callback) {
|
||||
/* Do derivation */ {
|
||||
REGISTER_TIMER_INFO("BpAvtTimer", getName().c_str());
|
||||
backwardActivation();
|
||||
}
|
||||
|
||||
const auto startPos = getInput(0).sequenceStartPositions->getVector(useGpu_);
|
||||
|
||||
BufferArgs inputs;
|
||||
BufferArgs outputs;
|
||||
inputs.addArg(*getOutputGrad(), *startPos);
|
||||
inputs.addArg(*getInputValue(0), *startPos);
|
||||
inputs.addArg(*weight_->getW(), *startPos);
|
||||
|
||||
MatrixPtr inGrad = getInputGrad(0);
|
||||
MatrixPtr wGrad = weight_->getWGrad();
|
||||
size_t h = getInputValue(0)->getHeight();
|
||||
size_t w = getInputValue(0)->getWidth();
|
||||
outputs.addArg(
|
||||
inGrad ? (*inGrad) : *(Matrix::create(nullptr, h, w, false, useGpu_)),
|
||||
*startPos,
|
||||
ADD_TO);
|
||||
outputs.addArg(
|
||||
wGrad ? (*wGrad)
|
||||
: *(Matrix::create(nullptr, contexLength_, w, false, useGpu_)),
|
||||
wDims_,
|
||||
ADD_TO);
|
||||
|
||||
{
|
||||
REGISTER_TIMER_INFO("RowConvBackward", getName().c_str());
|
||||
backward_[0]->calc(inputs, outputs);
|
||||
}
|
||||
|
||||
{
|
||||
REGISTER_TIMER_INFO("WeightUpdate", getName().c_str());
|
||||
weight_->getParameterPtr()->incUpdate(callback);
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace paddle
|
@ -0,0 +1,46 @@
|
||||
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License. */
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "Layer.h"
|
||||
|
||||
namespace paddle {
|
||||
|
||||
/**
|
||||
* \brief Row Convolution Layer.
|
||||
*/
|
||||
class RowConvLayer : public Layer {
|
||||
public:
|
||||
explicit RowConvLayer(const LayerConfig& config) : Layer(config) {}
|
||||
|
||||
~RowConvLayer() {}
|
||||
|
||||
bool init(const LayerMap& layerMap,
|
||||
const ParameterMap& parameterMap) override;
|
||||
void forward(PassType passType) override;
|
||||
void backward(const UpdateCallback& callback = nullptr) override;
|
||||
|
||||
protected:
|
||||
// Row convolution weight, context_lenght_ * fan_out.
|
||||
// fan_out is the size of output feature.
|
||||
std::unique_ptr<Weight> weight_;
|
||||
|
||||
// std::unique_ptr<Weight> biases_;
|
||||
|
||||
// how many steps to look ahead
|
||||
size_t contexLength_;
|
||||
TensorShape wDims_;
|
||||
};
|
||||
} // namespace paddle
|
Loading…
Reference in new issue