|
|
|
@ -215,12 +215,12 @@ void RecurrentLayer::forwardSequence(int batchSize,
|
|
|
|
|
void RecurrentLayer::forwardOneSequence(int start, int length) {
|
|
|
|
|
if (!reversed_) {
|
|
|
|
|
if (prevOutput_) {
|
|
|
|
|
frameOutput_[start].value->mul(prevOutput_, weight_->getW(), 1, 1);
|
|
|
|
|
frameOutput_[start].value->mul(*prevOutput_, *weight_->getW(), 1, 1);
|
|
|
|
|
}
|
|
|
|
|
activation_->forward(frameOutput_[start]);
|
|
|
|
|
for (int i = 1; i < length; ++i) {
|
|
|
|
|
frameOutput_[start + i].value->mul(
|
|
|
|
|
frameOutput_[start + i - 1].value, weight_->getW(), 1, 1);
|
|
|
|
|
*frameOutput_[start + i - 1].value, *weight_->getW(), 1, 1);
|
|
|
|
|
activation_->forward(frameOutput_[start + i]);
|
|
|
|
|
}
|
|
|
|
|
if (prevOutput_) {
|
|
|
|
@ -230,7 +230,7 @@ void RecurrentLayer::forwardOneSequence(int start, int length) {
|
|
|
|
|
activation_->forward(frameOutput_[start + length - 1]);
|
|
|
|
|
for (int i = length - 2; i >= 0; --i) {
|
|
|
|
|
frameOutput_[start + i].value->mul(
|
|
|
|
|
frameOutput_[start + i + 1].value, weight_->getW(), 1, 1);
|
|
|
|
|
*frameOutput_[start + i + 1].value, *weight_->getW(), 1, 1);
|
|
|
|
|
activation_->forward(frameOutput_[start + i]);
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
@ -282,13 +282,13 @@ void RecurrentLayer::backwardOneSequence(int start, int length) {
|
|
|
|
|
for (int i = length - 1; i > 0; --i) {
|
|
|
|
|
activation_->backward(frameOutput_[start + i]);
|
|
|
|
|
frameOutput_[start + i - 1].grad->mul(
|
|
|
|
|
frameOutput_[start + i].grad, weightT, 1, 1);
|
|
|
|
|
*frameOutput_[start + i].grad, *weightT, 1, 1);
|
|
|
|
|
}
|
|
|
|
|
activation_->backward(frameOutput_[start]);
|
|
|
|
|
if (weight_->getWGrad()) {
|
|
|
|
|
weight_->getWGrad()->mul(
|
|
|
|
|
output_.value->subMatrix(start, length - 1)->getTranspose(),
|
|
|
|
|
output_.grad->subMatrix(start + 1, length - 1),
|
|
|
|
|
*output_.value->subMatrix(start, length - 1)->getTranspose(),
|
|
|
|
|
*output_.grad->subMatrix(start + 1, length - 1),
|
|
|
|
|
1,
|
|
|
|
|
1);
|
|
|
|
|
}
|
|
|
|
@ -296,13 +296,13 @@ void RecurrentLayer::backwardOneSequence(int start, int length) {
|
|
|
|
|
for (int i = 0; i < length - 1; ++i) {
|
|
|
|
|
activation_->backward(frameOutput_[start + i]);
|
|
|
|
|
frameOutput_[start + i + 1].grad->mul(
|
|
|
|
|
frameOutput_[start + i].grad, weightT, 1, 1);
|
|
|
|
|
*frameOutput_[start + i].grad, *weightT, 1, 1);
|
|
|
|
|
}
|
|
|
|
|
activation_->backward(frameOutput_[start + length - 1]);
|
|
|
|
|
if (weight_->getWGrad()) {
|
|
|
|
|
weight_->getWGrad()->mul(
|
|
|
|
|
output_.value->subMatrix(start + 1, length - 1)->getTranspose(),
|
|
|
|
|
output_.grad->subMatrix(start, length - 1),
|
|
|
|
|
*output_.value->subMatrix(start + 1, length - 1)->getTranspose(),
|
|
|
|
|
*output_.grad->subMatrix(start, length - 1),
|
|
|
|
|
1,
|
|
|
|
|
1);
|
|
|
|
|
}
|
|
|
|
@ -329,7 +329,7 @@ void RecurrentLayer::forwardBatch(int batchSize,
|
|
|
|
|
if (n != 0) {
|
|
|
|
|
MatrixPtr batch1 =
|
|
|
|
|
batchValue_->getBatchValue(n - 1, batch2->getHeight());
|
|
|
|
|
batch2->mul(batch1, weight_->getW(), 1, 1);
|
|
|
|
|
batch2->mul(*batch1, *weight_->getW(), 1, 1);
|
|
|
|
|
}
|
|
|
|
|
Argument arg;
|
|
|
|
|
arg.value = batch2;
|
|
|
|
@ -367,14 +367,14 @@ void RecurrentLayer::backwardBatch(int batchSize,
|
|
|
|
|
|
|
|
|
|
if (n != 0) {
|
|
|
|
|
batch1 = batchGrad_->getBatchValue(n - 1, batch2->getHeight());
|
|
|
|
|
batch1->mul(batch2, weightT, 1, 1);
|
|
|
|
|
batch1->mul(*batch2, *weightT, 1, 1);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if (backwardByBatch && weight_->getWGrad()) {
|
|
|
|
|
if (n != 0) {
|
|
|
|
|
/* backward weight */
|
|
|
|
|
batch1 = batchValue_->getBatchValue(n - 1, batch2->getHeight());
|
|
|
|
|
weight_->getWGrad()->mul(batch1->getTranspose(), batch2, 1, 1);
|
|
|
|
|
weight_->getWGrad()->mul(*batch1->getTranspose(), *batch2, 1, 1);
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
@ -389,14 +389,14 @@ void RecurrentLayer::backwardBatch(int batchSize,
|
|
|
|
|
int len = starts[seq + 1] - starts[seq];
|
|
|
|
|
if (!reversed_) {
|
|
|
|
|
weight_->getWGrad()->mul(
|
|
|
|
|
output_.value->subMatrix(starts[seq], len - 1)->getTranspose(),
|
|
|
|
|
output_.grad->subMatrix(starts[seq] + 1, len - 1),
|
|
|
|
|
*output_.value->subMatrix(starts[seq], len - 1)->getTranspose(),
|
|
|
|
|
*output_.grad->subMatrix(starts[seq] + 1, len - 1),
|
|
|
|
|
1,
|
|
|
|
|
1);
|
|
|
|
|
} else {
|
|
|
|
|
weight_->getWGrad()->mul(
|
|
|
|
|
output_.value->subMatrix(starts[seq] + 1, len - 1)->getTranspose(),
|
|
|
|
|
output_.grad->subMatrix(starts[seq], len - 1),
|
|
|
|
|
*output_.value->subMatrix(starts[seq] + 1, len - 1)->getTranspose(),
|
|
|
|
|
*output_.grad->subMatrix(starts[seq], len - 1),
|
|
|
|
|
1,
|
|
|
|
|
1);
|
|
|
|
|
}
|
|
|
|
|