by going through Daoyuan's excellent paddle function design.avx_docs
parent
54a2b1f682
commit
838ef366dc
@ -0,0 +1,136 @@
|
||||
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License. */
|
||||
|
||||
#include "context_projection_op.h"
|
||||
#include "paddle/math/Matrix.h"
|
||||
#include "paddle/math/Vector.h"
|
||||
|
||||
namespace paddle {
|
||||
|
||||
template <>
|
||||
void ContextProjectionForward<DEVICE_TYPE_CPU>(Tensor& output,
|
||||
const Tensor& input,
|
||||
const Tensor& weight,
|
||||
const Tensor& sequence,
|
||||
size_t context_length,
|
||||
int context_start,
|
||||
size_t begin_pad,
|
||||
bool is_padding) {
|
||||
CHECK(output.getData() && input.getData() && sequence.getData());
|
||||
CHECK_EQ(output.dims_.size(), 2);
|
||||
CHECK_EQ(input.dims_.size(), 2);
|
||||
CHECK_EQ(weight.dims_.size(), 2);
|
||||
CHECK_EQ(sequence.dims_.size(), 1);
|
||||
|
||||
auto out_mat = std::make_shared<CpuMatrix>(
|
||||
output.getData(), output.dims_[0], output.dims_[1]);
|
||||
const auto in_mat = std::make_shared<CpuMatrix>(
|
||||
input.getData(), input.dims_[0], input.dims_[1]);
|
||||
const auto weight_mat =
|
||||
!weight.getData()
|
||||
? nullptr
|
||||
: std::make_shared<CpuMatrix>(
|
||||
weight.getData(), weight.dims_[0], input.dims_[1]);
|
||||
CpuIVector seq_vec(sequence.dims_[0],
|
||||
reinterpret_cast<int*>(sequence.getData()));
|
||||
CHECK_EQ(out_mat->getWidth(), in_mat->getWidth() * context_length);
|
||||
|
||||
const int* starts = seq_vec.getData();
|
||||
const size_t num_sequences = seq_vec.getSize() - 1;
|
||||
for (size_t i = 0; i < num_sequences; ++i) {
|
||||
for (size_t j = 0; j < context_length; ++j) {
|
||||
int begin = starts[i] + context_start + j;
|
||||
int end = starts[i + 1] + context_start + j;
|
||||
int dst_begin = starts[i];
|
||||
int dst_end = starts[i + 1];
|
||||
if (begin < starts[i]) {
|
||||
int64_t pad_size =
|
||||
std::min(starts[i] - begin, starts[i + 1] - starts[i]);
|
||||
MatrixPtr mat = out_mat->subMatrix(starts[i], pad_size);
|
||||
if (is_padding && weight_mat) {
|
||||
MatrixPtr sub = weight_mat->subMatrix(j, pad_size);
|
||||
mat->addAtOffset(*sub, j * in_mat->getWidth());
|
||||
}
|
||||
dst_begin = starts[i] + pad_size;
|
||||
begin = starts[i];
|
||||
}
|
||||
if (end > starts[i + 1]) {
|
||||
int64_t pad_size =
|
||||
std::min(end - starts[i + 1], starts[i + 1] - starts[i]);
|
||||
MatrixPtr mat = out_mat->subMatrix(starts[i + 1] - pad_size, pad_size);
|
||||
if (is_padding && weight_mat) {
|
||||
MatrixPtr sub = weight_mat->subMatrix(
|
||||
begin_pad + context_start + j - pad_size, pad_size);
|
||||
mat->addAtOffset(*sub, j * in_mat->getWidth());
|
||||
}
|
||||
dst_end = starts[i + 1] - pad_size;
|
||||
end = starts[i + 1];
|
||||
}
|
||||
if (end <= begin) continue;
|
||||
MatrixPtr src = in_mat->subMatrix(begin, end - begin);
|
||||
MatrixPtr dst = out_mat->subMatrix(dst_begin, dst_end - dst_begin);
|
||||
dst->addAtOffset(*src, j * in_mat->getWidth());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* \param inputs[0] input value.
|
||||
* \param inputs[1] input weight.
|
||||
* \param inputs[2] input sequence.
|
||||
* \param outputs[0] output value.
|
||||
*/
|
||||
template <DeviceType Device>
|
||||
class ContextProjectionForwardFunc : public FunctionBase {
|
||||
public:
|
||||
void init(const FuncConfig& config) override {
|
||||
context_length_ = config.get<size_t>("context_length");
|
||||
context_start_ = config.get<int>("context_start");
|
||||
begin_pad_ = config.get<size_t>("begin_pad");
|
||||
is_padding_ = config.get<bool>("is_padding");
|
||||
}
|
||||
|
||||
void calc(const Arguments& inputs,
|
||||
const Arguments& outputs,
|
||||
const Arguments& inouts) override {
|
||||
CHECK_EQ(3, inputs.size());
|
||||
CHECK_EQ(1, outputs.size());
|
||||
CHECK_EQ(0, inouts.size());
|
||||
|
||||
ContextProjectionForward<Device>((Tensor&)outputs[0],
|
||||
inputs[0],
|
||||
inputs[1],
|
||||
inputs[2],
|
||||
context_length_,
|
||||
context_start_,
|
||||
begin_pad_,
|
||||
is_padding_);
|
||||
}
|
||||
|
||||
private:
|
||||
size_t context_length_;
|
||||
int context_start_;
|
||||
size_t begin_pad_;
|
||||
bool is_padding_;
|
||||
};
|
||||
|
||||
REGISTER_TYPED_FUNC(ContextProjectionForward,
|
||||
CPU,
|
||||
ContextProjectionForwardFunc);
|
||||
#ifndef PADDLE_ONLY_CPU
|
||||
REGISTER_TYPED_FUNC(ContextProjectionForward,
|
||||
GPU,
|
||||
ContextProjectionForwardFunc);
|
||||
#endif
|
||||
} // namespace paddle
|
@ -0,0 +1,43 @@
|
||||
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License. */
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "Function.h"
|
||||
|
||||
namespace paddle {
|
||||
|
||||
/**
|
||||
* \brief Context Projection Forward.
|
||||
*
|
||||
* \param[out] outputs output data.
|
||||
* \param[in] input input data.
|
||||
* \param[in] weight input weight.
|
||||
* \param[in] sequence input data.
|
||||
* \param[in] context_length consecutive rows for concatenation.
|
||||
* \param[in] begin_pad context start position.
|
||||
* \param[in] is_padding whether padding 0 or not.
|
||||
*
|
||||
*/
|
||||
template <DeviceType Device>
|
||||
void ContextProjectionForward(Tensor& output,
|
||||
const Tensor& input,
|
||||
const Tensor& weight,
|
||||
const Tensor& sequence,
|
||||
size_t context_length,
|
||||
int context_start,
|
||||
size_t begin_pad,
|
||||
bool is_padding);
|
||||
|
||||
} // namespace paddle
|
@ -0,0 +1,137 @@
|
||||
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License. */
|
||||
|
||||
#include "hl_base.h"
|
||||
#include "context_projection_op.h"
|
||||
|
||||
namespace paddle {
|
||||
|
||||
template <bool padding>
|
||||
__global__ void KeContextProjectionForward(const real* input,
|
||||
const int* sequence,
|
||||
const real* weight,
|
||||
real* output,
|
||||
int input_dim,
|
||||
int context_length,
|
||||
int context_start,
|
||||
int begin_pad) {
|
||||
int idx = threadIdx.x;
|
||||
int block_size = blockDim.x;
|
||||
int sequenceId = blockIdx.x;
|
||||
int seq_start = sequence[sequenceId];
|
||||
int seq_end = sequence[sequenceId+1];
|
||||
real value = 0;
|
||||
|
||||
int instances = seq_end - seq_start + context_length - 1;
|
||||
output += seq_start * input_dim * context_length;
|
||||
input += seq_start * input_dim;
|
||||
for (int k = 0; k <= input_dim / block_size; k++) {
|
||||
if (idx < input_dim) {
|
||||
for (int i = 0; i < instances; i++) {
|
||||
// i + context_start;
|
||||
if ((i + context_start) < 0) {
|
||||
if (padding) {
|
||||
value = weight[i * input_dim + idx];
|
||||
} else {
|
||||
continue;
|
||||
}
|
||||
} else if ((i + context_start) >= (seq_end - seq_start)) {
|
||||
if (padding) {
|
||||
value =
|
||||
weight[(begin_pad + i + context_start - (seq_end - seq_start)) *
|
||||
input_dim + idx];
|
||||
} else {
|
||||
continue;
|
||||
}
|
||||
} else {
|
||||
value = input[(i + context_start) * input_dim + idx];
|
||||
}
|
||||
|
||||
int outx = (i - context_length) < 0 ? i : (context_length - 1);
|
||||
int outy = (i - context_length) < 0 ? 0 : (i - (context_length - 1));
|
||||
real* output_r =
|
||||
output + outy * input_dim * context_length + outx * input_dim;
|
||||
for (int j = outy; j < seq_end - seq_start; j++) {
|
||||
output_r[idx] += value;
|
||||
if (j - outy == outx) break;
|
||||
output_r += (context_length - 1) * input_dim;
|
||||
}
|
||||
}
|
||||
}
|
||||
idx += block_size;
|
||||
}
|
||||
}
|
||||
|
||||
void hl_context_projection_forward(const real* input,
|
||||
const int* sequence,
|
||||
real* weight,
|
||||
real* output,
|
||||
int num_sequences,
|
||||
int input_dim,
|
||||
int context_length,
|
||||
int context_start,
|
||||
int begin_pad,
|
||||
bool is_padding) {
|
||||
CHECK_NOTNULL(input);
|
||||
CHECK_NOTNULL(sequence);
|
||||
CHECK_NOTNULL(output);
|
||||
CHECK(!is_padding || weight);
|
||||
|
||||
int block_size = 128;
|
||||
int blocks_x = num_sequences;
|
||||
int blocks_y = 1;
|
||||
dim3 threads(block_size, 1);
|
||||
dim3 grid(blocks_x, blocks_y);
|
||||
|
||||
if (is_padding) {
|
||||
KeContextProjectionForward<true><<< grid, threads, 0, STREAM_DEFAULT >>>
|
||||
(input, sequence, weight, output, input_dim,
|
||||
context_length, context_start, begin_pad);
|
||||
} else {
|
||||
KeContextProjectionForward<false><<< grid, threads, 0, STREAM_DEFAULT >>>
|
||||
(input, sequence, weight, output, input_dim,
|
||||
context_length, context_start, begin_pad);
|
||||
}
|
||||
CHECK_SYNC("hl_context_projection_forward failed");
|
||||
}
|
||||
|
||||
template <>
|
||||
void ContextProjectionForward<DEVICE_TYPE_GPU>(Tensor& output,
|
||||
const Tensor& input,
|
||||
const Tensor& weight,
|
||||
const Tensor& sequence,
|
||||
size_t context_length,
|
||||
int context_start,
|
||||
size_t begin_pad,
|
||||
bool is_padding) {
|
||||
CHECK(output.getData() && input.getData() && sequence.getData());
|
||||
CHECK_EQ(output.dims_.size(), 2);
|
||||
CHECK_EQ(input.dims_.size(), 2);
|
||||
CHECK_EQ(weight.dims_.size(), 2);
|
||||
CHECK_EQ(sequence.dims_.size(), 1);
|
||||
CHECK_EQ(output.dims_[1], input.dims_[1] * context_length);
|
||||
|
||||
hl_context_projection_forward(input.getData(),
|
||||
reinterpret_cast<int*>(sequence.getData()),
|
||||
weight.getData(),
|
||||
output.getData(),
|
||||
sequence.dims_[0] - 1,
|
||||
input.dims_[1],
|
||||
context_length,
|
||||
context_start,
|
||||
begin_pad,
|
||||
is_padding);
|
||||
}
|
||||
|
||||
} // namespace paddle
|
@ -0,0 +1,101 @@
|
||||
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License. */
|
||||
|
||||
#include <gtest/gtest.h>
|
||||
#include "FunctionTest.h"
|
||||
#include "paddle/gserver/tests/TestUtil.h"
|
||||
#include "paddle/math/Matrix.h"
|
||||
|
||||
using namespace paddle; // NOLINT
|
||||
|
||||
void testMatrixProjectionForward(int context_start,
|
||||
size_t context_length,
|
||||
bool is_padding,
|
||||
size_t batch_size,
|
||||
size_t input_dim) {
|
||||
size_t pad = std::max(0, -context_start) +
|
||||
std::max(0, (int)(context_start + context_length - 1));
|
||||
if (pad == 0) is_padding = false;
|
||||
|
||||
FunctionCompare compare("ContextProjectionForward",
|
||||
FuncConfig()
|
||||
.set("context_length", context_length)
|
||||
.set("context_start", context_start)
|
||||
.set("begin_pad", std::max(0, -context_start))
|
||||
.set("is_padding", is_padding));
|
||||
|
||||
CpuMatrix cpu_in(batch_size, input_dim);
|
||||
cpu_in.randomizeUniform();
|
||||
GpuMatrix gpu_in(batch_size, input_dim);
|
||||
gpu_in.copyFrom(cpu_in);
|
||||
auto cpu_weight =
|
||||
is_padding ? std::make_shared<CpuMatrix>(pad, input_dim) : nullptr;
|
||||
auto gpu_weight =
|
||||
is_padding ? std::make_shared<GpuMatrix>(pad, input_dim) : nullptr;
|
||||
if (is_padding) {
|
||||
cpu_weight->randomizeUniform();
|
||||
gpu_weight->copyFrom(*cpu_weight);
|
||||
}
|
||||
IVectorPtr cpu_seq;
|
||||
generateSequenceStartPositions(batch_size, cpu_seq);
|
||||
IVectorPtr gpu_seq = IVector::create(cpu_seq->getSize(), true);
|
||||
gpu_seq->copyFrom(*cpu_seq);
|
||||
|
||||
CpuMatrix cpu_out(batch_size, input_dim * context_length);
|
||||
GpuMatrix gpu_out(batch_size, input_dim * context_length);
|
||||
cpu_out.randomizeUniform();
|
||||
gpu_out.copyFrom(cpu_out);
|
||||
|
||||
compare.getCpuFunction()->calc(
|
||||
{Tensor(cpu_in.getData(), Dims{batch_size, input_dim}),
|
||||
Tensor(cpu_weight ? cpu_weight->getData() : nullptr,
|
||||
Dims{pad, input_dim}),
|
||||
Tensor(reinterpret_cast<real*>(cpu_seq->getData()),
|
||||
Dims{cpu_seq->getSize()})},
|
||||
{Tensor(cpu_out.getData(), Dims{batch_size, input_dim * context_length})},
|
||||
{});
|
||||
compare.getGpuFunction()->calc(
|
||||
{Tensor(gpu_in.getData(), Dims{batch_size, input_dim}),
|
||||
Tensor(gpu_weight ? gpu_weight->getData() : nullptr,
|
||||
Dims{pad, input_dim}),
|
||||
Tensor(reinterpret_cast<real*>(gpu_seq->getData()),
|
||||
Dims{gpu_seq->getSize()})},
|
||||
{Tensor(gpu_out.getData(), Dims{batch_size, input_dim * context_length})},
|
||||
{});
|
||||
|
||||
autotest::TensorCheckEqual(cpu_out, gpu_out);
|
||||
}
|
||||
|
||||
TEST(ContextProjectionForward, projection) {
|
||||
for (auto context_start : {-5, -3, -1, 0, 3}) {
|
||||
for (auto context_length : {1, 2, 5, 7}) {
|
||||
for (auto trainable_padding : {false, true}) {
|
||||
for (auto batch_size : {1, 2, 5, 20, 100}) {
|
||||
for (auto input_dim : {15, 32, 63, 128, 200}) {
|
||||
VLOG(3) << " context_start=" << context_start
|
||||
<< " context_length=" << context_length
|
||||
<< " trainable_padding=" << trainable_padding
|
||||
<< " batch_size=" << batch_size
|
||||
<< " input_dim=" << input_dim;
|
||||
testMatrixProjectionForward(context_start,
|
||||
context_length,
|
||||
trainable_padding,
|
||||
batch_size,
|
||||
input_dim);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
Loading…
Reference in new issue