|
|
@ -47,43 +47,23 @@ bool ContextProjection::init() {
|
|
|
|
int context_start = config_.context_start();
|
|
|
|
int context_start = config_.context_start();
|
|
|
|
bool is_padding = config_.trainable_padding();
|
|
|
|
bool is_padding = config_.trainable_padding();
|
|
|
|
size_t total_pad = is_padding ? beginPad_ + endPad_ : 0;
|
|
|
|
size_t total_pad = is_padding ? beginPad_ + endPad_ : 0;
|
|
|
|
if (!useGpu_) { // CPU functions
|
|
|
|
|
|
|
|
createFunction(forward_,
|
|
|
|
createFunction(forward_,
|
|
|
|
"ContextProjectionForward-CPU",
|
|
|
|
"ContextProjectionForward",
|
|
|
|
FuncConfig()
|
|
|
|
FuncConfig()
|
|
|
|
.set("context_length", context_length)
|
|
|
|
.set("context_length", context_length)
|
|
|
|
.set("context_start", context_start)
|
|
|
|
.set("context_start", context_start)
|
|
|
|
.set("begin_pad", beginPad_)
|
|
|
|
.set("begin_pad", beginPad_)
|
|
|
|
.set("is_padding", is_padding));
|
|
|
|
.set("is_padding", is_padding));
|
|
|
|
createFunction(backward_,
|
|
|
|
createFunction(backward_,
|
|
|
|
"ContextProjectionBackward-CPU",
|
|
|
|
"ContextProjectionBackward",
|
|
|
|
FuncConfig()
|
|
|
|
FuncConfig()
|
|
|
|
.set("context_length", context_length)
|
|
|
|
.set("context_length", context_length)
|
|
|
|
.set("context_start", context_start)
|
|
|
|
.set("context_start", context_start)
|
|
|
|
.set("begin_pad", beginPad_)
|
|
|
|
.set("begin_pad", beginPad_)
|
|
|
|
.set("is_padding", is_padding));
|
|
|
|
.set("is_padding", is_padding)
|
|
|
|
} else { // GPU functions
|
|
|
|
.set("total_pad", total_pad));
|
|
|
|
createFunction(forward_,
|
|
|
|
|
|
|
|
"ContextProjectionForward-GPU",
|
|
|
|
|
|
|
|
FuncConfig()
|
|
|
|
|
|
|
|
.set("context_length", context_length)
|
|
|
|
|
|
|
|
.set("context_start", context_start)
|
|
|
|
|
|
|
|
.set("begin_pad", beginPad_)
|
|
|
|
|
|
|
|
.set("is_padding", is_padding));
|
|
|
|
|
|
|
|
createFunction(backward_,
|
|
|
|
|
|
|
|
"ContextProjectionBackwardData-GPU",
|
|
|
|
|
|
|
|
FuncConfig()
|
|
|
|
|
|
|
|
.set("context_length", context_length)
|
|
|
|
|
|
|
|
.set("context_start", context_start));
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
createFunction(backward_,
|
|
|
|
|
|
|
|
"ContextProjectionBackwardWeight-GPU",
|
|
|
|
|
|
|
|
FuncConfig()
|
|
|
|
|
|
|
|
.set("context_length", context_length)
|
|
|
|
|
|
|
|
.set("context_start", context_start)
|
|
|
|
|
|
|
|
.set("begin_pad", beginPad_)
|
|
|
|
|
|
|
|
.set("total_pad", total_pad));
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
return true;
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
@ -185,38 +165,16 @@ void ContextProjection::backward(const UpdateCallback& callback) {
|
|
|
|
REGISTER_TIMER_INFO("ContextProjectionBackward", getName().c_str());
|
|
|
|
REGISTER_TIMER_INFO("ContextProjectionBackward", getName().c_str());
|
|
|
|
bool is_padding = config_.trainable_padding();
|
|
|
|
bool is_padding = config_.trainable_padding();
|
|
|
|
auto start_pos = in_->sequenceStartPositions;
|
|
|
|
auto start_pos = in_->sequenceStartPositions;
|
|
|
|
if (!out_->grad->useGpu()) {
|
|
|
|
auto w_ptr = is_padding ? weight_->getWGrad() : nullptr;
|
|
|
|
auto w_ptr = is_padding ? weight_->getWGrad() : nullptr;
|
|
|
|
backward_[0]->calc({Tensor(in_->grad ? in_->grad->getData() : nullptr,
|
|
|
|
backward_[0]->calc({Tensor(in_->grad ? in_->grad->getData() : nullptr,
|
|
|
|
Dims{batch_size, input_dim}),
|
|
|
|
Dims{batch_size, input_dim}),
|
|
|
|
Tensor(w_ptr ? w_ptr->getData() : nullptr,
|
|
|
|
Tensor(w_ptr ? w_ptr->getData() : nullptr,
|
|
|
|
Dims{w_ptr ? w_ptr->getHeight() : 0, input_dim}),
|
|
|
|
Dims{w_ptr ? w_ptr->getHeight() : 0, input_dim}),
|
|
|
|
Tensor(reinterpret_cast<real*>(
|
|
|
|
Tensor(reinterpret_cast<real*>(const_cast<int*>(
|
|
|
|
const_cast<int*>(start_pos->getData(useGpu_))),
|
|
|
|
start_pos->getData(useGpu_))),
|
|
|
|
Dims{start_pos->getSize()})},
|
|
|
|
Dims{start_pos->getSize()})},
|
|
|
|
{Tensor(out_->grad->getData(), Dims{batch_size, dim})},
|
|
|
|
{Tensor(out_->grad->getData(), Dims{batch_size, dim})},
|
|
|
|
{});
|
|
|
|
{});
|
|
|
|
|
|
|
|
} else {
|
|
|
|
|
|
|
|
if (in_->grad) {
|
|
|
|
|
|
|
|
backward_[0]->calc(
|
|
|
|
|
|
|
|
{Tensor(in_->grad->getData(), Dims{batch_size, input_dim}),
|
|
|
|
|
|
|
|
Tensor(reinterpret_cast<real*>(
|
|
|
|
|
|
|
|
const_cast<int*>(start_pos->getData(useGpu_))),
|
|
|
|
|
|
|
|
Dims{start_pos->getSize()})},
|
|
|
|
|
|
|
|
{Tensor(out_->grad->getData(), Dims{batch_size, dim})},
|
|
|
|
|
|
|
|
{});
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
if (is_padding && weight_->getWGrad()) {
|
|
|
|
|
|
|
|
backward_[1]->calc(
|
|
|
|
|
|
|
|
{Tensor(weight_->getWGrad()->getData(),
|
|
|
|
|
|
|
|
Dims{weight_->getWGrad()->getHeight(), input_dim}),
|
|
|
|
|
|
|
|
Tensor(reinterpret_cast<real*>(
|
|
|
|
|
|
|
|
const_cast<int*>(start_pos->getData(useGpu_))),
|
|
|
|
|
|
|
|
Dims{start_pos->getSize()})},
|
|
|
|
|
|
|
|
{Tensor(out_->grad->getData(), Dims{batch_size, dim})},
|
|
|
|
|
|
|
|
{});
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
if (config_.trainable_padding()) {
|
|
|
|
if (config_.trainable_padding()) {
|
|
|
|
weight_->getParameterPtr()->incUpdate(callback);
|
|
|
|
weight_->getParameterPtr()->incUpdate(callback);
|
|
|
|