You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
306 lines
10 KiB
306 lines
10 KiB
/* Copyright (c) 2016 Baidu, Inc. All Rights Reserve.
|
|
|
|
Licensed under the Apache License, Version 2.0 (the "License");
|
|
you may not use this file except in compliance with the License.
|
|
You may obtain a copy of the License at
|
|
|
|
http://www.apache.org/licenses/LICENSE-2.0
|
|
|
|
Unless required by applicable law or agreed to in writing, software
|
|
distributed under the License is distributed on an "AS IS" BASIS,
|
|
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
See the License for the specific language governing permissions and
|
|
limitations under the License. */
|
|
|
|
|
|
#include "paddle/utils/Logging.h"
|
|
#include "paddle/utils/Stat.h"
|
|
#include "ExpandConvTransLayer.h"
|
|
|
|
/* The implementation of the convTransLayer is basically a swap of forward and
|
|
* backward of the original convLayer.
|
|
* The variable naming follows the convention of the convLayer.
|
|
* */
|
|
|
|
namespace paddle {
|
|
|
|
REGISTER_LAYER(exconvt, ExpandConvTransLayer);
|
|
|
|
bool ExpandConvTransLayer::init(const LayerMap &layerMap,
|
|
const ParameterMap ¶meterMap) {
|
|
/* Initialize the basic convolutional parent class */
|
|
ConvBaseLayer::init(layerMap, parameterMap);
|
|
|
|
/* Initialize the projection */
|
|
for (auto &inputConfig : config_.inputs()) {
|
|
const ConvConfig &conf = inputConfig.conv_conf();
|
|
subM_.push_back(conf.channels() / conf.groups());
|
|
subN_.push_back(conf.output_x() * conf.output_x());
|
|
subK_.push_back(numFilters_ * conf.filter_size() * conf.filter_size() /
|
|
conf.groups());
|
|
/* Consistent caffe mode for multiple input */
|
|
caffeMode_ = conf.caffe_mode();
|
|
}
|
|
|
|
return true;
|
|
}
|
|
|
|
// Why this is necessary after calling init?
|
|
size_t ExpandConvTransLayer::getSize() {
|
|
CHECK_NE(inputLayers_.size(), 0UL);
|
|
imgSizeH_.clear();
|
|
imgSizeW_.clear();
|
|
outputH_.clear();
|
|
outputW_.clear();
|
|
subN_.clear();
|
|
size_t layerSize = 0;
|
|
for (size_t i = 0; i < inputLayers_.size(); i++) {
|
|
outputH_.push_back(inputLayers_[i]->getOutput().getFrameHeight());
|
|
outputW_.push_back(inputLayers_[i]->getOutput().getFrameWidth());
|
|
if (outputH_[i] == 0) outputH_[i] = outputX_[i];
|
|
if (outputW_[i] == 0) outputW_[i] = outputX_[i];
|
|
imgSizeH_.push_back(
|
|
imageSize(outputH_[i], filterSize_[i], padding_[i], stride_[i]));
|
|
imgSizeW_.push_back(
|
|
imageSize(outputW_[i], filterSize_[i], padding_[i], stride_[i]));
|
|
subN_.push_back(outputH_[i] * outputW_[i]);
|
|
CHECK(layerSize == 0 ||
|
|
imgSizeH_[i] * imgSizeW_[i] * (size_t)numFilters_ == layerSize);
|
|
layerSize = imgSizeH_[i] * imgSizeW_[i] * numFilters_;
|
|
}
|
|
getOutput().setFrameHeight(imgSizeH_[0]);
|
|
getOutput().setFrameWidth(imgSizeW_[0]);
|
|
return layerSize;
|
|
}
|
|
|
|
void ExpandConvTransLayer::resetExpandInput(size_t height, size_t width) {
|
|
Matrix::resizeOrCreate(expandInput_, height, width, false, useGpu_);
|
|
}
|
|
|
|
/*void ExpandConvTransLayer::resetConvOutput(size_t batchSize, int inIdx) {
|
|
Matrix::resizeOrCreate(transOutValue_, batchSize * numFilters_, subN_[inIdx],
|
|
false, useGpu_);
|
|
}*/
|
|
|
|
|
|
|
|
void ExpandConvTransLayer::expandOneFrame(MatrixPtr image, size_t startIdx,
|
|
int inIdx) {
|
|
resetExpandInput(subK_[inIdx] * groups_[inIdx], subN_[inIdx]);
|
|
real *imgData = image->getData() + startIdx * image->getWidth();
|
|
MatrixPtr imageTmp = Matrix::create(
|
|
imgData, 1, imgSizeH_[inIdx] * imgSizeW_[inIdx] * channel_, false,
|
|
useGpu_);
|
|
expandInput_->convExpand(*imageTmp, imgSizeH_[inIdx], imgSizeW_[inIdx],
|
|
channel_, filterSize_[inIdx],
|
|
filterSize_[inIdx], stride_[inIdx], stride_[inIdx],
|
|
padding_[inIdx], padding_[inIdx],
|
|
outputH_[inIdx], outputW_[inIdx]);
|
|
imageTmp->clear();
|
|
}
|
|
|
|
void ExpandConvTransLayer::expandBackOnce(MatrixPtr imageGrad, int inIdx,
|
|
int startIdx) {
|
|
int subM = subM_[inIdx];
|
|
int subN = subN_[inIdx];
|
|
int subK = subK_[inIdx];
|
|
|
|
LayerPtr prevLayer = getPrev(inIdx);
|
|
if (NULL == prevLayer->getOutputGrad()) {
|
|
return;
|
|
}
|
|
|
|
expandOneFrame(imageGrad, startIdx, inIdx);
|
|
|
|
real *outGradData =
|
|
prevLayer -> getOutputGrad()->getData()
|
|
+ startIdx * subN * numFilters_[inIdx];
|
|
|
|
real *wgtData = weights_[inIdx]->getW()->getData();
|
|
real *expInData = expandInput_->getData();
|
|
for (int g = 0; g < groups_[inIdx]; ++g) {
|
|
MatrixPtr A =
|
|
Matrix::create(wgtData, subK, subM, true, useGpu_); // mark transpose
|
|
MatrixPtr B = Matrix::create(expInData, subK, subN, false, useGpu_);
|
|
MatrixPtr C = Matrix::create(outGradData, subM, subN, false, useGpu_);
|
|
C->mul(A, B, 1, 1);
|
|
|
|
A->clear();
|
|
B->clear();
|
|
C->clear();
|
|
wgtData += subK * subM;
|
|
expInData += subK * subN;
|
|
outGradData += subM * subN;
|
|
}
|
|
}
|
|
|
|
void ExpandConvTransLayer::forward(PassType passType) {
|
|
Layer::forward(passType);
|
|
|
|
/* malloc memory for the output_ if necessary */
|
|
/* note: one sample correspond to one colum, and the
|
|
* transOutValue correspond sample to one row */
|
|
int batchSize = inputLayers_[0]->getOutputValue()->getHeight();
|
|
resetOutput(batchSize, getSize());
|
|
|
|
MatrixPtr output = nullptr;
|
|
for (size_t i = 0; i != inputLayers_.size(); ++i) {
|
|
LayerPtr prevLayer = getPrev(i);
|
|
output = prevLayer->getOutputValue();
|
|
REGISTER_TIMER_INFO("shrinkFwd", getName().c_str());
|
|
shrinkFwd(output, i);
|
|
}
|
|
|
|
/* add the bias-vector */
|
|
if (biases_.get() != NULL) {
|
|
if (sharedBiases_) {
|
|
addSharedBias();
|
|
} else {
|
|
addUnsharedBias();
|
|
}
|
|
}
|
|
|
|
/* activation */
|
|
forwardActivation();
|
|
}
|
|
|
|
void ExpandConvTransLayer::shrinkFwd(MatrixPtr output, int inpIdx) {
|
|
int subM = subM_[inpIdx];
|
|
int subN = subN_[inpIdx];
|
|
int subK = subK_[inpIdx];
|
|
|
|
size_t batchSize = output->getHeight();
|
|
MatrixPtr image = getOutputValue();
|
|
|
|
/* reset the expand-grad memory */
|
|
resetExpandInput(subK * groups_[inpIdx], subN);
|
|
|
|
real *localData = output->getData();
|
|
real *imageData = image->getData();
|
|
for (size_t n = 0; n < batchSize; n++) {
|
|
real *wgtData = weights_[inpIdx]->getW()->getData();
|
|
real *expandInData = expandInput_->getData();
|
|
|
|
for (int g = 0; g < groups_[inpIdx]; g++) {
|
|
// create temporary matrix
|
|
MatrixPtr C = Matrix::create(expandInData, subK, subN, false, useGpu_);
|
|
MatrixPtr B = Matrix::create(localData, subM, subN, false, useGpu_);
|
|
MatrixPtr A = Matrix::create(wgtData, subK, subM, false, useGpu_);
|
|
C->mul(A, B); // mul
|
|
|
|
// clear the temporary matrix
|
|
A->clear();
|
|
B->clear();
|
|
C->clear();
|
|
|
|
expandInData += subK * subN;
|
|
localData += subM * subN;
|
|
wgtData += subK * subM;
|
|
}
|
|
|
|
// shrink one frame outGrad
|
|
MatrixPtr oneTmp = Matrix::create(
|
|
expandInput_->getData(), subK * groups_[inpIdx], subN, false, useGpu_);
|
|
MatrixPtr vTmp = Matrix::create(
|
|
imageData, 1,
|
|
imgSizeH_[inpIdx] * imgSizeW_[inpIdx] * channel_, false,
|
|
useGpu_);
|
|
vTmp->convShrink(*oneTmp, imgSizeH_[inpIdx], imgSizeW_[inpIdx],
|
|
channel_, filterSize_[inpIdx],
|
|
filterSize_[inpIdx], stride_[inpIdx], stride_[inpIdx],
|
|
padding_[inpIdx], padding_[inpIdx],
|
|
outputH_[inpIdx], outputW_[inpIdx], 1.0f, 1.0f);
|
|
vTmp->clear();
|
|
oneTmp->clear();
|
|
|
|
// move the data-pointer
|
|
imageData += imgSizeH_[inpIdx] * imgSizeW_[inpIdx] * channel_;
|
|
}
|
|
}
|
|
|
|
void ExpandConvTransLayer::bpropSharedBias(MatrixPtr biases, MatrixPtr v) {
|
|
size_t mapW = getSize() / channel_;
|
|
size_t mapH = v->getElementCnt() / mapW;
|
|
MatrixPtr vTmp = Matrix::create(v->getData(), mapH, mapW, false, useGpu_);
|
|
|
|
Matrix::resizeOrCreate(transOutValue_, mapW, mapH, false, useGpu_);
|
|
|
|
vTmp->transpose(transOutValue_, false); // false means no memory allocation
|
|
vTmp->reshape(transOutValue_->getElementCnt() / channel_, channel_);
|
|
biases->collectBias(*vTmp, 1.0f);
|
|
}
|
|
|
|
void ExpandConvTransLayer::bpropBiases(MatrixPtr v) {
|
|
MatrixPtr biases =
|
|
Matrix::create(biases_->getWGrad()->getData(), 1,
|
|
biases_->getWGrad()->getElementCnt(), false, useGpu_);
|
|
if (sharedBiases_) {
|
|
bpropSharedBias(biases, v);
|
|
} else {
|
|
biases->collectBias(*v, 1.0f);
|
|
}
|
|
biases->clear();
|
|
}
|
|
|
|
void ExpandConvTransLayer::backward(const UpdateCallback &callback) {
|
|
backwardActivation();
|
|
|
|
MatrixPtr imageGrad = getOutputGrad();
|
|
if (biases_ && biases_->getWGrad()) {
|
|
bpropBiases(imageGrad);
|
|
/* Increasing the number of gradient */
|
|
biases_->getParameterPtr()->incUpdate(callback);
|
|
}
|
|
|
|
for (size_t i = 0; i != inputLayers_.size(); ++i) {
|
|
/* First, calculate the input layers error */
|
|
for (size_t off = 0; off < imageGrad->getHeight(); off++) {
|
|
expandBackOnce(imageGrad, i, off);
|
|
}
|
|
if (weights_[i]->getWGrad()) {
|
|
/* Then, calculate the W-gradient for the current layer */
|
|
bpropWeights(imageGrad, i);
|
|
/* Increasing the number of gradient */
|
|
weights_[i]->getParameterPtr()->incUpdate(callback);
|
|
}
|
|
}
|
|
}
|
|
|
|
void ExpandConvTransLayer::bpropWeights(MatrixPtr v, int inpIdx) {
|
|
MatrixPtr weightGrad = weights_[inpIdx]->getWGrad();
|
|
MatrixPtr outputV = getPrev(inpIdx)->getOutputValue();
|
|
|
|
int subM = subM_[inpIdx];
|
|
int subN = subN_[inpIdx];
|
|
int subK = subK_[inpIdx];
|
|
size_t batchSize = outputV->getHeight();
|
|
resetExpandInput(subK * groups_[inpIdx], subN);
|
|
|
|
real *outputData = outputV -> getData();
|
|
|
|
for (size_t n = 0; n < batchSize; n++) { // frame by frame
|
|
// expand
|
|
expandOneFrame(v, n, inpIdx);
|
|
real *wGradData = weightGrad->getData();
|
|
real *expandInData = expandInput_->getData();
|
|
|
|
// expand-mul one-group by one
|
|
for (int g = 0; g < groups_[inpIdx]; g++) {
|
|
MatrixPtr A = Matrix::create(expandInData, subK, subN, false, useGpu_);
|
|
MatrixPtr B = Matrix::create(outputData, subM, subN, true, useGpu_);
|
|
MatrixPtr C = Matrix::create(wGradData, subK, subM, false, useGpu_);
|
|
C->mul(A, B, 1, 1);
|
|
|
|
A->clear();
|
|
B->clear();
|
|
C->clear();
|
|
outputData += subM * subN;
|
|
wGradData += subK * subM;
|
|
expandInData += subK * subN;
|
|
}
|
|
}
|
|
}
|
|
|
|
|
|
} // namespace paddle
|