|
|
|
@ -1304,68 +1304,6 @@ void GpuMatrix::maxSequenceBackward(Matrix& outputGrad,
|
|
|
|
|
hl_max_sequence_backward(outGrad, maxIndex, inputGrad, numSequences, dim);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
void GpuMatrix::contextProjectionForward(Matrix& input,
|
|
|
|
|
Matrix* weight,
|
|
|
|
|
const IVector& sequence,
|
|
|
|
|
int contextLength,
|
|
|
|
|
int contextStart,
|
|
|
|
|
size_t beginPad,
|
|
|
|
|
bool isPadding) {
|
|
|
|
|
CHECK(dynamic_cast<GpuMatrix*>(&input));
|
|
|
|
|
CHECK(dynamic_cast<const GpuIVector*>(&sequence));
|
|
|
|
|
if (weight) CHECK(dynamic_cast<GpuMatrix*>(weight));
|
|
|
|
|
CHECK_EQ(getWidth(), input.getWidth() * contextLength);
|
|
|
|
|
|
|
|
|
|
hl_context_projection_forward(input.getData(),
|
|
|
|
|
sequence.getData(),
|
|
|
|
|
isPadding ? weight->getData() : NULL,
|
|
|
|
|
getData(),
|
|
|
|
|
sequence.getSize() - 1,
|
|
|
|
|
input.getWidth(),
|
|
|
|
|
contextLength,
|
|
|
|
|
contextStart,
|
|
|
|
|
beginPad,
|
|
|
|
|
isPadding);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
void GpuMatrix::contextProjectionBackwardData(Matrix& inputGrad,
|
|
|
|
|
const IVector& sequence,
|
|
|
|
|
int contextLength,
|
|
|
|
|
int contextStart) {
|
|
|
|
|
CHECK(dynamic_cast<GpuMatrix*>(&inputGrad));
|
|
|
|
|
CHECK(dynamic_cast<const GpuIVector*>(&sequence));
|
|
|
|
|
CHECK_EQ(getWidth(), inputGrad.getWidth() * contextLength);
|
|
|
|
|
|
|
|
|
|
hl_context_projection_backward_data(getData(),
|
|
|
|
|
sequence.getData(),
|
|
|
|
|
inputGrad.getData(),
|
|
|
|
|
sequence.getSize() - 1,
|
|
|
|
|
inputGrad.getWidth(),
|
|
|
|
|
contextLength,
|
|
|
|
|
contextStart);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
void GpuMatrix::contextProjectionBackwardWeight(Matrix& weightGrad,
|
|
|
|
|
const IVector& sequence,
|
|
|
|
|
int contextLength,
|
|
|
|
|
int contextStart,
|
|
|
|
|
int totalPad,
|
|
|
|
|
size_t beginPad) {
|
|
|
|
|
CHECK(dynamic_cast<GpuMatrix*>(&weightGrad));
|
|
|
|
|
CHECK(dynamic_cast<const GpuIVector*>(&sequence));
|
|
|
|
|
CHECK_EQ(getWidth(), weightGrad.getWidth() * contextLength);
|
|
|
|
|
|
|
|
|
|
hl_context_projection_backward_weight(getData(),
|
|
|
|
|
sequence.getData(),
|
|
|
|
|
weightGrad.getData(),
|
|
|
|
|
sequence.getSize() - 1,
|
|
|
|
|
weightGrad.getWidth(),
|
|
|
|
|
totalPad,
|
|
|
|
|
contextLength,
|
|
|
|
|
contextStart,
|
|
|
|
|
beginPad);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
void GpuMatrix::paramReluForward(Matrix& data, Matrix& W) {
|
|
|
|
|
CHECK(data.useGpu_ == true && W.useGpu_ == true)
|
|
|
|
|
<< "Matrix type are not equal";
|
|
|
|
@ -2203,113 +2141,6 @@ void CpuMatrix::maxSequenceBackward(Matrix& outputGrad,
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
void CpuMatrix::contextProjectionForward(Matrix& input,
|
|
|
|
|
Matrix* weight,
|
|
|
|
|
const IVector& sequence,
|
|
|
|
|
int contextLength,
|
|
|
|
|
int contextStart,
|
|
|
|
|
size_t beginPad,
|
|
|
|
|
bool isPadding) {
|
|
|
|
|
auto input_ptr = dynamic_cast<CpuMatrix*>(&input);
|
|
|
|
|
auto seq_ptr = dynamic_cast<const CpuIVector*>(&sequence);
|
|
|
|
|
CHECK(input_ptr && seq_ptr);
|
|
|
|
|
if (weight) CHECK(dynamic_cast<CpuMatrix*>(weight));
|
|
|
|
|
CHECK_EQ(getWidth(), input_ptr->getWidth() * contextLength);
|
|
|
|
|
|
|
|
|
|
const int* starts = seq_ptr->getData();
|
|
|
|
|
size_t numSequences = seq_ptr->getSize() - 1;
|
|
|
|
|
for (size_t i = 0; i < numSequences; ++i) {
|
|
|
|
|
for (int j = 0; j < contextLength; ++j) {
|
|
|
|
|
int begin = starts[i] + contextStart + j;
|
|
|
|
|
int end = starts[i + 1] + contextStart + j;
|
|
|
|
|
int dstBegin = starts[i];
|
|
|
|
|
int dstEnd = starts[i + 1];
|
|
|
|
|
if (begin < starts[i]) {
|
|
|
|
|
int64_t padSize =
|
|
|
|
|
std::min(starts[i] - begin, starts[i + 1] - starts[i]);
|
|
|
|
|
MatrixPtr mat = this->subMatrix(starts[i], padSize);
|
|
|
|
|
if (isPadding) {
|
|
|
|
|
MatrixPtr sub = weight->subMatrix(j, padSize);
|
|
|
|
|
mat->addAtOffset(*sub, j * input_ptr->getWidth());
|
|
|
|
|
}
|
|
|
|
|
dstBegin = starts[i] + padSize;
|
|
|
|
|
begin = starts[i];
|
|
|
|
|
}
|
|
|
|
|
if (end > starts[i + 1]) {
|
|
|
|
|
int64_t padSize =
|
|
|
|
|
std::min(end - starts[i + 1], starts[i + 1] - starts[i]);
|
|
|
|
|
MatrixPtr mat = this->subMatrix(starts[i + 1] - padSize, padSize);
|
|
|
|
|
if (isPadding) {
|
|
|
|
|
MatrixPtr sub =
|
|
|
|
|
weight->subMatrix(beginPad + contextStart + j - padSize, padSize);
|
|
|
|
|
mat->addAtOffset(*sub, j * input_ptr->getWidth());
|
|
|
|
|
}
|
|
|
|
|
dstEnd = starts[i + 1] - padSize;
|
|
|
|
|
end = starts[i + 1];
|
|
|
|
|
}
|
|
|
|
|
if (end <= begin) continue;
|
|
|
|
|
MatrixPtr src = input_ptr->subMatrix(begin, end - begin);
|
|
|
|
|
MatrixPtr dst = this->subMatrix(dstBegin, dstEnd - dstBegin);
|
|
|
|
|
dst->addAtOffset(*src, j * input_ptr->getWidth());
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
void CpuMatrix::contextProjectionBackward(Matrix* inputGrad,
|
|
|
|
|
Matrix* weightGrad,
|
|
|
|
|
const IVector& sequence,
|
|
|
|
|
int contextLength,
|
|
|
|
|
int contextStart,
|
|
|
|
|
size_t beginPad,
|
|
|
|
|
bool isPadding) {
|
|
|
|
|
if (inputGrad) CHECK(dynamic_cast<CpuMatrix*>(inputGrad));
|
|
|
|
|
if (weightGrad) CHECK(dynamic_cast<CpuMatrix*>(weightGrad));
|
|
|
|
|
CHECK(dynamic_cast<const CpuIVector*>(&sequence));
|
|
|
|
|
|
|
|
|
|
int64_t inputDim = inputGrad ? inputGrad->getWidth()
|
|
|
|
|
: weightGrad ? weightGrad->getWidth() : 0;
|
|
|
|
|
CHECK_EQ(getWidth(), inputDim * contextLength);
|
|
|
|
|
|
|
|
|
|
const int* starts = sequence.getData();
|
|
|
|
|
size_t numSequences = sequence.getSize() - 1;
|
|
|
|
|
for (size_t i = 0; i < numSequences; ++i) {
|
|
|
|
|
for (int j = 0; j < contextLength; ++j) {
|
|
|
|
|
int begin = starts[i] + contextStart + j;
|
|
|
|
|
int end = starts[i + 1] + contextStart + j;
|
|
|
|
|
int dstBegin = starts[i];
|
|
|
|
|
int dstEnd = starts[i + 1];
|
|
|
|
|
if (begin < starts[i]) {
|
|
|
|
|
int64_t padSize =
|
|
|
|
|
std::min(starts[i] - begin, starts[i + 1] - starts[i]);
|
|
|
|
|
if (isPadding && weightGrad) {
|
|
|
|
|
MatrixPtr mat = this->subMatrix(starts[i], padSize);
|
|
|
|
|
MatrixPtr sub = weightGrad->subMatrix(j, padSize);
|
|
|
|
|
sub->addAtOffset(*mat, j * inputDim);
|
|
|
|
|
}
|
|
|
|
|
dstBegin = starts[i] + padSize;
|
|
|
|
|
begin = starts[i];
|
|
|
|
|
}
|
|
|
|
|
if (end > starts[i + 1]) {
|
|
|
|
|
int64_t padSize =
|
|
|
|
|
std::min(end - starts[i + 1], starts[i + 1] - starts[i]);
|
|
|
|
|
if (isPadding && weightGrad) {
|
|
|
|
|
MatrixPtr mat = this->subMatrix(starts[i + 1] - padSize, padSize);
|
|
|
|
|
MatrixPtr sub = weightGrad->subMatrix(
|
|
|
|
|
beginPad + contextStart + j - padSize, padSize);
|
|
|
|
|
sub->addAtOffset(*mat, j * inputDim);
|
|
|
|
|
}
|
|
|
|
|
dstEnd = starts[i + 1] - padSize;
|
|
|
|
|
end = starts[i + 1];
|
|
|
|
|
}
|
|
|
|
|
if (end <= begin) continue;
|
|
|
|
|
if (!inputGrad) continue;
|
|
|
|
|
MatrixPtr src = inputGrad->subMatrix(begin, end - begin);
|
|
|
|
|
MatrixPtr dst = this->subMatrix(dstBegin, dstEnd - dstBegin);
|
|
|
|
|
src->addAtOffset(*dst, j * inputDim);
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
inline void vecAddTo(real* a, const real* b, size_t len) {
|
|
|
|
|
for (unsigned int i = 0; i < len; ++i) {
|
|
|
|
|
a[i] += b[i];
|
|
|
|
|