You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
Paddle/paddle/gserver/layers/Layer.cpp

411 lines
13 KiB

/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "paddle/utils/Util.h"
#include "CostLayer.h"
#include "paddle/math/SparseMatrix.h"
#include "paddle/utils/Error.h"
#include "paddle/utils/Logging.h"
#ifndef PADDLE_MOBILE_INFERENCE
#include "ValidationLayer.h"
#endif
DEFINE_bool(log_error_clipping, false, "enable log error clipping or not");
namespace paddle {
Layer::Layer(const LayerConfig& config, bool useGpu)
: config_(config),
useGpu_(useGpu),
deviceId_(CPU_DEVICE),
needSequenceInfo_(true) {}
bool Layer::init(const LayerMap& layerMap, const ParameterMap& parameterMap) {
if (useGpu_ && FLAGS_parallel_nn) {
/* gpu environment is specified by device property */
deviceId_ = config_.device();
if (deviceId_ < 0) {
useGpu_ = false;
}
}
output_.deviceId = deviceId_;
for (auto& inputConfig : config_.inputs()) {
std::string inputName = inputConfig.input_layer_name();
LayerPtr inputLayer;
CHECK(mapGet(inputName, layerMap, &inputLayer))
<< "Cannot find input layer " << inputName << " for layer "
<< getName();
this->addPrev(inputLayer);
inputLayer->addOutputArgument(deviceId_);
if (inputConfig.has_input_parameter_name()) {
ParameterPtr parameter;
CHECK(
mapGet(inputConfig.input_parameter_name(), parameterMap, &parameter))
<< "Cannot find input parameter "
<< inputConfig.input_parameter_name() << " for layer " << getName();
parameter->incShared();
CHECK_EQ(parameter->getDeviceId(), getDeviceId());
parameters_.push_back(parameter);
} else {
parameters_.push_back(nullptr);
}
if (inputConfig.has_input_layer_argument()) {
inputArgument_.push_back(inputConfig.input_layer_argument());
} else {
inputArgument_.push_back("");
}
}
if (config_.has_bias_parameter_name()) {
CHECK(mapGet(config_.bias_parameter_name(), parameterMap, &biasParameter_))
<< "Cannot find bias parameter " << config_.bias_parameter_name()
<< " for layer " << getName();
biasParameter_->incShared();
CHECK_EQ(biasParameter_->getDeviceId(), getDeviceId());
}
/* specify the activation function according to the configuration */
std::string action_type = config_.active_type();
activation_.reset(ActivationFunction::create(action_type));
CHECK(activation_);
initNeedFlags();
markInBackward_.assign(inputLayers_.size(), false);
return true;
}
ClassRegistrar<Layer, LayerConfig> Layer::registrar_;
LayerPtr Layer::create(const LayerConfig& config) {
std::string type = config.type();
// NOTE: As following types have illegal character '-',
// they can not use REGISTER_LAYER to registrar.
// Besides, to fit with old training models,
// they can not use '_' instead.
if (type == "multi-class-cross-entropy")
return LayerPtr(new MultiClassCrossEntropy(config));
else if (type == "rank-cost")
return LayerPtr(new RankingCost(config));
#ifndef PADDLE_MOBILE_INFERENCE
else if (type == "auc-validation")
return LayerPtr(new AucValidation(config));
else if (type == "pnpair-validation")
return LayerPtr(new PnpairValidation(config));
#endif
return LayerPtr(registrar_.createByType(config.type(), config));
}
void Layer::resetSpecifyOutput(Argument& output,
size_t height,
size_t width,
bool isValueClean,
bool isGradClean) {
SetDevice device(output.deviceId);
Matrix::resizeOrCreate(
output.value, height, width, /* trans */ false, useGpu(output.deviceId));
if (isValueClean) {
output.value->zeroMem();
}
if (passType_ != PASS_TEST && needGradient()) {
Matrix::resizeOrCreate(
output.grad, height, width, /* trans */ false, useGpu(output.deviceId));
if (isGradClean) {
output.grad->zeroMem();
}
}
}
void Layer::resizeOutput(size_t height, size_t width) {
resetSpecifyOutput(output_, height, width, false, false);
for (size_t i = 0; i != outputOtherDevice_.size(); i++) {
resetSpecifyOutput(outputOtherDevice_[i], height, width, false, false);
}
}
void Layer::reserveOutput(size_t height, size_t width) {
resetSpecifyOutput(output_, height, width, false, true);
for (size_t i = 0; i != outputOtherDevice_.size(); i++) {
resetSpecifyOutput(outputOtherDevice_[i], height, width, false, true);
}
}
void Layer::resetOutput(size_t height, size_t width) {
resetSpecifyOutput(output_, height, width, true, true);
for (size_t i = 0; i != outputOtherDevice_.size(); i++) {
resetSpecifyOutput(outputOtherDevice_[i], height, width, true, true);
}
}
void Layer::addOutputArgument(int deviceId) {
if (deviceId == deviceId_) {
output_.countIncrement();
return;
} else {
for (size_t i = 0; i < outputOtherDevice_.size(); i++) {
if (outputOtherDevice_[i].deviceId == deviceId) {
outputOtherDevice_[i].countIncrement();
return;
}
}
}
Argument argu;
argu.deviceId = deviceId;
outputOtherDevice_.push_back(argu);
outputOtherDevice_.back().countIncrement();
}
void Layer::copyOutputToOtherDevice() {
for (size_t i = 0; i != outputOtherDevice_.size(); i++) {
SetDevice device(outputOtherDevice_[i].deviceId);
// If outputOtherDevice_[i].value is a CpuMatrix,
// the copyFrom is a synchronous interface.
// If outputOtherDevice_[i].value is a GpuMatrix, since subsequent
// calculations are all on HPPL_STREAM_DEFAULT,
// copyFrom can be an asynchronous interface.
outputOtherDevice_[i].value->copyFrom(*getOutputValue(),
HPPL_STREAM_DEFAULT);
outputOtherDevice_[i].sequenceStartPositions =
output_.sequenceStartPositions;
outputOtherDevice_[i].subSequenceStartPositions =
output_.subSequenceStartPositions;
outputOtherDevice_[i].cpuSequenceDims = output_.cpuSequenceDims;
outputOtherDevice_[i].notifyValueReady();
}
}
void Layer::waitInputValue() {
for (size_t i = 0; i != inputLayers_.size(); i++) {
if (inputLayers_[i]->getDeviceId() != deviceId_) {
getInput(i).waitValueReady();
}
}
}
void Layer::waitAndMergeOutputGrad() {
if (!output_.grad || !outputOtherDevice_.size()) {
return;
}
for (size_t i = 0; i != outputOtherDevice_.size(); i++) {
outputOtherDevice_[i].waitGradReady();
}
/* merge output grad */
size_t i = 0;
if (!output_.getAllCount()) {
output_.grad->copyFrom(*outputOtherDevice_[0].grad, HPPL_STREAM_1);
hl_stream_synchronize(HPPL_STREAM_1);
i++;
if (outputOtherDevice_.size() == 1) return;
}
Matrix::resizeOrCreate(tmpGrad_,
output_.grad->getHeight(),
output_.grad->getWidth(),
/* trans */ false,
useGpu(output_.deviceId));
for (; i != outputOtherDevice_.size(); i++) {
tmpGrad_->copyFrom(*outputOtherDevice_[i].grad, HPPL_STREAM_1);
hl_stream_synchronize(HPPL_STREAM_1);
output_.grad->add(*tmpGrad_);
}
}
void Layer::markAllInputGrad() {
for (size_t i = 0; i != inputLayers_.size(); ++i) {
if (!markInBackward_[i]) {
inputLayers_[i]->getOutput(deviceId_).notifyGradReady();
}
markInBackward_[i] = false;
}
}
void Layer::markInputGrad(int inputIndex) {
inputLayers_[inputIndex]->getOutput(deviceId_).notifyGradReady();
markInBackward_[inputIndex] = true;
}
void Layer::zeroGrad() {
CHECK(output_.grad.get() != NULL);
output_.grad->zeroMem();
}
void Layer::initNeedFlags() {
auto initFlag = [this](
bool& flag, bool (Layer::*flagQueryFunc)() const, ParameterType type) {
flag = false;
if (biasParameter_ && biasParameter_->hasType(type)) {
flag = true;
}
if (!flag) {
for (auto& para : parameters_) {
if (para && para->hasType(type)) {
flag = true;
break;
}
}
}
if (!flag) {
for (auto& layer : inputLayers_) {
if ((layer.get()->*flagQueryFunc)()) {
flag = true;
}
}
}
};
initFlag(needGradient_, &Layer::needGradient, PARAMETER_GRADIENT);
}
void Layer::showOutputStats() {
MatrixPtr out = getOutputValue();
if (!out) return;
if (!out->getElementCnt()) {
LOG(INFO) << "The number of output of " << config_.name()
<< " is 0, skip to show the statistics";
return;
}
MatrixPtr outSquare;
if (dynamic_cast<GpuSparseMatrix*>(out.get())) {
GpuSparseMatrix* tmp = dynamic_cast<GpuSparseMatrix*>(out.get());
outSquare = std::make_shared<CpuSparseMatrix>(tmp->getHeight(),
tmp->getWidth(),
tmp->getElementCnt(),
tmp->getValueType(),
tmp->getFormat());
} else {
outSquare = out->clone();
}
outSquare->copyFrom(*out, HPPL_STREAM_DEFAULT);
hl_stream_synchronize(HPPL_STREAM_DEFAULT);
real mean = outSquare->getSum() / out->getElementCnt();
real min;
real max;
if (dynamic_cast<CpuSparseMatrix*>(outSquare.get())) {
auto tmpMat = dynamic_cast<CpuSparseMatrix*>(outSquare.get());
min = tmpMat->getMin();
max = tmpMat->getMax();
tmpMat->square2();
LOG(INFO) << "show statistics of [none zero values] in sparse matrix";
} else {
min = outSquare->getMin();
max = outSquare->getMax();
outSquare->square2();
}
real std = (outSquare->getSum() / outSquare->getElementCnt()) - mean * mean;
std = std > 0 ? std : 0;
LOG(INFO) << "The output state of " << config_.name() << ": mean=" << mean
<< ", "
<< "std=" << std << ", "
<< "min=" << min << ", "
<< "max=" << max;
}
void Layer::forwardActivation() {
/* activation */
auto status = activation_->forward(output_);
status.check();
/* dropout */
if (config_.drop_rate() > 0) {
forwardDropOut();
CHECK_NE(activation_->getName(), "softmax")
<< "Softmax activation cannot be used with Dropout";
}
if (FLAGS_show_layer_stat) {
showOutputStats();
}
}
void Layer::backwardActivation() {
/* Do error clipping */
if (config_.error_clipping_threshold() > 0.0f) {
if (FLAGS_log_error_clipping) {
VectorPtr outGradVec = Vector::create(
output_.grad->getData(), output_.grad->getElementCnt(), useGpu_);
real maxAbsGrad = outGradVec->getAbsMax();
if (maxAbsGrad > config_.error_clipping_threshold()) {
real avgAbsGrad = outGradVec->getAbsSum() / outGradVec->getSize();
LOG(INFO) << " layer=" << config_.name() << " need clipping,"
<< " max error=" << maxAbsGrad << " avg error=" << avgAbsGrad;
}
}
output_.grad->clip(-config_.error_clipping_threshold(),
config_.error_clipping_threshold());
}
/* Do dropout for delta*/
if (config_.drop_rate() > 0 && passType_ != PASS_TEST) {
MatrixPtr oGrad = getOutputGrad();
oGrad->dotMul(*oGrad, *dropOutMask_);
}
auto status = activation_->backward(output_);
status.check();
}
void Layer::forwardDropOut() {
auto& outV = getOutputValue();
if (passType_ == PASS_TRAIN) {
// new dropOutMask_ if dropOutMask_ is null ptr
Matrix::resizeOrCreate(dropOutMask_,
outV->getHeight(),
outV->getWidth(),
false,
useGpu(deviceId_));
dropOutMask_->randomizeUniform(); // generate a uniform random matrix
dropOutMask_->biggerThanScalar(config_.drop_rate()); // random mask
outV->dotMul(*outV, *dropOutMask_); // dropout
} else if (passType_ == PASS_GC) {
// only initialize once
if (!dropOutMask_) {
dropOutMask_ = Matrix::create(
outV->getHeight(), outV->getWidth(), false, useGpu(deviceId_));
// We use cpu matrix to generate mask so that the mask
// will be same for both gpu version and cpu version.
// This will help unittest to make sure they have same result.
MatrixPtr tmpMask = Matrix::create(outV->getHeight(), outV->getWidth());
tmpMask->randomizeUniform(); // generate a uniform random matrix
tmpMask->biggerThanScalar(config_.drop_rate()); // random mask
dropOutMask_->copyFrom(*tmpMask);
}
outV->dotMul(*outV, *dropOutMask_);
} else { // passType == PASS_TEST
outV->mulScalar(1.0 - config_.drop_rate());
}
}
} // namespace paddle