You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
Paddle/paddle/gserver/layers/ExpandConvTransLayer.cpp

306 lines
10 KiB

/* Copyright (c) 2016 Baidu, Inc. All Rights Reserve.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "paddle/utils/Logging.h"
#include "paddle/utils/Stat.h"
#include "ExpandConvTransLayer.h"
/* The implementation of the convTransLayer is basically a swap of forward and
* backward of the original convLayer.
* The variable naming follows the convention of the convLayer.
* */
namespace paddle {
REGISTER_LAYER(exconvt, ExpandConvTransLayer);
bool ExpandConvTransLayer::init(const LayerMap &layerMap,
const ParameterMap &parameterMap) {
/* Initialize the basic convolutional parent class */
ConvBaseLayer::init(layerMap, parameterMap);
/* Initialize the projection */
for (auto &inputConfig : config_.inputs()) {
const ConvConfig &conf = inputConfig.conv_conf();
subM_.push_back(conf.channels() / conf.groups());
subN_.push_back(conf.output_x() * conf.output_x());
subK_.push_back(numFilters_ * conf.filter_size() * conf.filter_size() /
conf.groups());
/* Consistent caffe mode for multiple input */
caffeMode_ = conf.caffe_mode();
}
return true;
}
// Why this is necessary after calling init?
size_t ExpandConvTransLayer::getSize() {
CHECK_NE(inputLayers_.size(), 0UL);
imgSizeH_.clear();
imgSizeW_.clear();
outputH_.clear();
outputW_.clear();
subN_.clear();
size_t layerSize = 0;
for (size_t i = 0; i < inputLayers_.size(); i++) {
outputH_.push_back(inputLayers_[i]->getOutput().getFrameHeight());
outputW_.push_back(inputLayers_[i]->getOutput().getFrameWidth());
if (outputH_[i] == 0) outputH_[i] = outputX_[i];
if (outputW_[i] == 0) outputW_[i] = outputX_[i];
imgSizeH_.push_back(
imageSize(outputH_[i], filterSize_[i], padding_[i], stride_[i]));
imgSizeW_.push_back(
imageSize(outputW_[i], filterSize_[i], padding_[i], stride_[i]));
subN_.push_back(outputH_[i] * outputW_[i]);
CHECK(layerSize == 0 ||
imgSizeH_[i] * imgSizeW_[i] * (size_t)numFilters_ == layerSize);
layerSize = imgSizeH_[i] * imgSizeW_[i] * numFilters_;
}
getOutput().setFrameHeight(imgSizeH_[0]);
getOutput().setFrameWidth(imgSizeW_[0]);
return layerSize;
}
void ExpandConvTransLayer::resetExpandInput(size_t height, size_t width) {
Matrix::resizeOrCreate(expandInput_, height, width, false, useGpu_);
}
/*void ExpandConvTransLayer::resetConvOutput(size_t batchSize, int inIdx) {
Matrix::resizeOrCreate(transOutValue_, batchSize * numFilters_, subN_[inIdx],
false, useGpu_);
}*/
void ExpandConvTransLayer::expandOneFrame(MatrixPtr image, size_t startIdx,
int inIdx) {
resetExpandInput(subK_[inIdx] * groups_[inIdx], subN_[inIdx]);
real *imgData = image->getData() + startIdx * image->getWidth();
MatrixPtr imageTmp = Matrix::create(
imgData, 1, imgSizeH_[inIdx] * imgSizeW_[inIdx] * channel_, false,
useGpu_);
expandInput_->convExpand(*imageTmp, imgSizeH_[inIdx], imgSizeW_[inIdx],
channel_, filterSize_[inIdx],
filterSize_[inIdx], stride_[inIdx], stride_[inIdx],
padding_[inIdx], padding_[inIdx],
outputH_[inIdx], outputW_[inIdx]);
imageTmp->clear();
}
void ExpandConvTransLayer::expandBackOnce(MatrixPtr imageGrad, int inIdx,
int startIdx) {
int subM = subM_[inIdx];
int subN = subN_[inIdx];
int subK = subK_[inIdx];
LayerPtr prevLayer = getPrev(inIdx);
if (NULL == prevLayer->getOutputGrad()) {
return;
}
expandOneFrame(imageGrad, startIdx, inIdx);
real *outGradData =
prevLayer -> getOutputGrad()->getData()
+ startIdx * subN * numFilters_[inIdx];
real *wgtData = weights_[inIdx]->getW()->getData();
real *expInData = expandInput_->getData();
for (int g = 0; g < groups_[inIdx]; ++g) {
MatrixPtr A =
Matrix::create(wgtData, subK, subM, true, useGpu_); // mark transpose
MatrixPtr B = Matrix::create(expInData, subK, subN, false, useGpu_);
MatrixPtr C = Matrix::create(outGradData, subM, subN, false, useGpu_);
C->mul(A, B, 1, 1);
A->clear();
B->clear();
C->clear();
wgtData += subK * subM;
expInData += subK * subN;
outGradData += subM * subN;
}
}
void ExpandConvTransLayer::forward(PassType passType) {
Layer::forward(passType);
/* malloc memory for the output_ if necessary */
/* note: one sample correspond to one colum, and the
* transOutValue correspond sample to one row */
int batchSize = inputLayers_[0]->getOutputValue()->getHeight();
resetOutput(batchSize, getSize());
MatrixPtr output = nullptr;
for (size_t i = 0; i != inputLayers_.size(); ++i) {
LayerPtr prevLayer = getPrev(i);
output = prevLayer->getOutputValue();
REGISTER_TIMER_INFO("shrinkFwd", getName().c_str());
shrinkFwd(output, i);
}
/* add the bias-vector */
if (biases_.get() != NULL) {
if (sharedBiases_) {
addSharedBias();
} else {
addUnsharedBias();
}
}
/* activation */
forwardActivation();
}
void ExpandConvTransLayer::shrinkFwd(MatrixPtr output, int inpIdx) {
int subM = subM_[inpIdx];
int subN = subN_[inpIdx];
int subK = subK_[inpIdx];
size_t batchSize = output->getHeight();
MatrixPtr image = getOutputValue();
/* reset the expand-grad memory */
resetExpandInput(subK * groups_[inpIdx], subN);
real *localData = output->getData();
real *imageData = image->getData();
for (size_t n = 0; n < batchSize; n++) {
real *wgtData = weights_[inpIdx]->getW()->getData();
real *expandInData = expandInput_->getData();
for (int g = 0; g < groups_[inpIdx]; g++) {
// create temporary matrix
MatrixPtr C = Matrix::create(expandInData, subK, subN, false, useGpu_);
MatrixPtr B = Matrix::create(localData, subM, subN, false, useGpu_);
MatrixPtr A = Matrix::create(wgtData, subK, subM, false, useGpu_);
C->mul(A, B); // mul
// clear the temporary matrix
A->clear();
B->clear();
C->clear();
expandInData += subK * subN;
localData += subM * subN;
wgtData += subK * subM;
}
// shrink one frame outGrad
MatrixPtr oneTmp = Matrix::create(
expandInput_->getData(), subK * groups_[inpIdx], subN, false, useGpu_);
MatrixPtr vTmp = Matrix::create(
imageData, 1,
imgSizeH_[inpIdx] * imgSizeW_[inpIdx] * channel_, false,
useGpu_);
vTmp->convShrink(*oneTmp, imgSizeH_[inpIdx], imgSizeW_[inpIdx],
channel_, filterSize_[inpIdx],
filterSize_[inpIdx], stride_[inpIdx], stride_[inpIdx],
padding_[inpIdx], padding_[inpIdx],
outputH_[inpIdx], outputW_[inpIdx], 1.0f, 1.0f);
vTmp->clear();
oneTmp->clear();
// move the data-pointer
imageData += imgSizeH_[inpIdx] * imgSizeW_[inpIdx] * channel_;
}
}
void ExpandConvTransLayer::bpropSharedBias(MatrixPtr biases, MatrixPtr v) {
size_t mapW = getSize() / channel_;
size_t mapH = v->getElementCnt() / mapW;
MatrixPtr vTmp = Matrix::create(v->getData(), mapH, mapW, false, useGpu_);
Matrix::resizeOrCreate(transOutValue_, mapW, mapH, false, useGpu_);
vTmp->transpose(transOutValue_, false); // false means no memory allocation
vTmp->reshape(transOutValue_->getElementCnt() / channel_, channel_);
biases->collectBias(*vTmp, 1.0f);
}
void ExpandConvTransLayer::bpropBiases(MatrixPtr v) {
MatrixPtr biases =
Matrix::create(biases_->getWGrad()->getData(), 1,
biases_->getWGrad()->getElementCnt(), false, useGpu_);
if (sharedBiases_) {
bpropSharedBias(biases, v);
} else {
biases->collectBias(*v, 1.0f);
}
biases->clear();
}
void ExpandConvTransLayer::backward(const UpdateCallback &callback) {
backwardActivation();
MatrixPtr imageGrad = getOutputGrad();
if (biases_ && biases_->getWGrad()) {
bpropBiases(imageGrad);
/* Increasing the number of gradient */
biases_->getParameterPtr()->incUpdate(callback);
}
for (size_t i = 0; i != inputLayers_.size(); ++i) {
/* First, calculate the input layers error */
for (size_t off = 0; off < imageGrad->getHeight(); off++) {
expandBackOnce(imageGrad, i, off);
}
if (weights_[i]->getWGrad()) {
/* Then, calculate the W-gradient for the current layer */
bpropWeights(imageGrad, i);
/* Increasing the number of gradient */
weights_[i]->getParameterPtr()->incUpdate(callback);
}
}
}
void ExpandConvTransLayer::bpropWeights(MatrixPtr v, int inpIdx) {
MatrixPtr weightGrad = weights_[inpIdx]->getWGrad();
MatrixPtr outputV = getPrev(inpIdx)->getOutputValue();
int subM = subM_[inpIdx];
int subN = subN_[inpIdx];
int subK = subK_[inpIdx];
size_t batchSize = outputV->getHeight();
resetExpandInput(subK * groups_[inpIdx], subN);
real *outputData = outputV -> getData();
for (size_t n = 0; n < batchSize; n++) { // frame by frame
// expand
expandOneFrame(v, n, inpIdx);
real *wGradData = weightGrad->getData();
real *expandInData = expandInput_->getData();
// expand-mul one-group by one
for (int g = 0; g < groups_[inpIdx]; g++) {
MatrixPtr A = Matrix::create(expandInData, subK, subN, false, useGpu_);
MatrixPtr B = Matrix::create(outputData, subM, subN, true, useGpu_);
MatrixPtr C = Matrix::create(wGradData, subK, subM, false, useGpu_);
C->mul(A, B, 1, 1);
A->clear();
B->clear();
C->clear();
outputData += subM * subN;
wGradData += subK * subM;
expandInData += subK * subN;
}
}
}
} // namespace paddle