Feature/copytensor (#5455)
* "make global tensor function independently" * "replace functor" * "fix inline template error" * "fix tensor array with CopyFrom" * "fix other case use CopyFrom" * "move the op interface hardly" * "fix operators" * "fix typo" * "delete dynamic recurrent rnn and fix gru_unit in debugmode" * "fix unique_ptr copy" * "fix cuda copy" * "fix namespace error" * "removed nccl python test" * "fix include error" * "fix typo" * fix copy util testrelease/0.11.0
parent
748fdbbec5
commit
45062fe5d7
File diff suppressed because it is too large
Load Diff
@ -1,132 +0,0 @@
|
|||||||
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
|
|
||||||
|
|
||||||
Licensed under the Apache License, Version 2.0 (the "License");
|
|
||||||
you may not use this file except in compliance with the License.
|
|
||||||
You may obtain a copy of the License at
|
|
||||||
|
|
||||||
http://www.apache.org/licenses/LICENSE-2.0
|
|
||||||
|
|
||||||
Unless required by applicable law or agreed to in writing, software
|
|
||||||
distributed under the License is distributed on an "AS IS" BASIS,
|
|
||||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
||||||
See the License for the specific language governing permissions and
|
|
||||||
limitations under the License. */
|
|
||||||
|
|
||||||
#pragma once
|
|
||||||
#include <vector>
|
|
||||||
|
|
||||||
#include "paddle/framework/lod_tensor.h"
|
|
||||||
|
|
||||||
namespace paddle {
|
|
||||||
namespace framework {
|
|
||||||
|
|
||||||
/*
|
|
||||||
* DyBatchSeqPosition stores indices of the basic element in tensor. It is used
|
|
||||||
* after lod-tensor's re-assembling, its info can be used to recover the order
|
|
||||||
* in original lod-tensor.
|
|
||||||
*/
|
|
||||||
struct DySeqMeta {
|
|
||||||
DySeqMeta(size_t begin, size_t end, size_t ori_idx)
|
|
||||||
: begin(begin), end(end), ori_idx(ori_idx) {}
|
|
||||||
|
|
||||||
size_t begin;
|
|
||||||
size_t end; // not included
|
|
||||||
size_t ori_idx;
|
|
||||||
};
|
|
||||||
|
|
||||||
using DySeqMetaBatch = std::vector<DySeqMeta>;
|
|
||||||
|
|
||||||
/*
|
|
||||||
* Extract the indices of instances.
|
|
||||||
*/
|
|
||||||
std::vector<size_t> GenDyBatchIndice(const DySeqMetaBatch &metas, int batch_id);
|
|
||||||
|
|
||||||
/*
|
|
||||||
* TensorArray is a C-array-like array of tensors, it is meant to be used with
|
|
||||||
* dynamic iteration primitives such as while_loop. It is used to segment inputs
|
|
||||||
* and store states in all time steps.
|
|
||||||
*
|
|
||||||
* By providing some methods similar to a C++ array, the difinition of some
|
|
||||||
* state-based dynamic models such as RNN cound be more natural and highly
|
|
||||||
* flexible.
|
|
||||||
*/
|
|
||||||
class TensorArray {
|
|
||||||
public:
|
|
||||||
using value_type = float;
|
|
||||||
|
|
||||||
// max number of values allowed to store.
|
|
||||||
const size_t MAX_SIZE{100000};
|
|
||||||
|
|
||||||
/*
|
|
||||||
* Read the value at location `index` in the `TensorArray`.
|
|
||||||
*/
|
|
||||||
const LoDTensor &Read(size_t index) const;
|
|
||||||
|
|
||||||
/*
|
|
||||||
* Write value into the index of the TensorArray.
|
|
||||||
*/
|
|
||||||
void Write(size_t index, const LoDTensor &value);
|
|
||||||
|
|
||||||
/*
|
|
||||||
* Write value into the index of the TensorArray, with memory shared.
|
|
||||||
*/
|
|
||||||
void WriteShared(size_t index, const LoDTensor &value);
|
|
||||||
|
|
||||||
/*
|
|
||||||
* Recover the original LoD-arranged LoDTensor with the `values`, `level` and
|
|
||||||
* `indice_map`.
|
|
||||||
*/
|
|
||||||
LoDTensor Pack(size_t level, const DySeqMetaBatch &meta,
|
|
||||||
const LoD &lod) const;
|
|
||||||
|
|
||||||
/*
|
|
||||||
* Split LoDTensor in some `level` and write the generated batches to
|
|
||||||
* `values`, if set `desend`, will sort by length in descending order else in
|
|
||||||
* ascending order.
|
|
||||||
*/
|
|
||||||
DySeqMetaBatch Unpack(const LoDTensor &source, int level, bool length_desend);
|
|
||||||
|
|
||||||
/*
|
|
||||||
* Pack an array of LoDTensors to a LoDTensor.
|
|
||||||
*/
|
|
||||||
LoDTensor LodPack(size_t level) const;
|
|
||||||
|
|
||||||
/*
|
|
||||||
* Unpack a LoDTensor to an array of LoDTensors.
|
|
||||||
*/
|
|
||||||
void LodUnpack(const LoDTensor &source, size_t level);
|
|
||||||
|
|
||||||
/*
|
|
||||||
* Pack the values into a tensor with rank one higher than each tensor in
|
|
||||||
* values.
|
|
||||||
*/
|
|
||||||
LoDTensor Stack() const;
|
|
||||||
|
|
||||||
/*
|
|
||||||
* Unstacks the given division of a rank-`R` tensor into rank-`(R-1)` tensors.
|
|
||||||
*/
|
|
||||||
void Unstack(const LoDTensor &source) const;
|
|
||||||
|
|
||||||
/*
|
|
||||||
* Unstacks the given division of a rank-`R` tensor into rank-`(R-1)` tensors,
|
|
||||||
* with memory of tensors shared.
|
|
||||||
*/
|
|
||||||
void UnstackShared(const LoDTensor &source) const;
|
|
||||||
|
|
||||||
/*
|
|
||||||
* Return the number of values.
|
|
||||||
*/
|
|
||||||
size_t size() const;
|
|
||||||
|
|
||||||
protected:
|
|
||||||
void Unstack(const LoDTensor &source, bool data_shared) const;
|
|
||||||
|
|
||||||
LoDTensor LodPackTwo(const LoDTensor &pre, const LoDTensor &cur,
|
|
||||||
size_t level) const;
|
|
||||||
|
|
||||||
private:
|
|
||||||
mutable std::vector<LoDTensor> values_;
|
|
||||||
}; // class TensorArray
|
|
||||||
|
|
||||||
} // namespace framework
|
|
||||||
} // namespace paddle
|
|
@ -1,182 +0,0 @@
|
|||||||
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
|
|
||||||
|
|
||||||
Licensed under the Apache License, Version 2.0 (the "License");
|
|
||||||
you may not use this file except in compliance with the License.
|
|
||||||
You may obtain a copy of the License at
|
|
||||||
|
|
||||||
http://www.apache.org/licenses/LICENSE-2.0
|
|
||||||
|
|
||||||
Unless required by applicable law or agreed to in writing, software
|
|
||||||
distributed under the License is distributed on an "AS IS" BASIS,
|
|
||||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
||||||
See the License for the specific language governing permissions and
|
|
||||||
limitations under the License. */
|
|
||||||
|
|
||||||
#include "paddle/framework/tensor_array.h"
|
|
||||||
|
|
||||||
#include <gtest/gtest.h>
|
|
||||||
|
|
||||||
namespace paddle {
|
|
||||||
namespace framework {
|
|
||||||
|
|
||||||
class TensorArrayTester : public ::testing::Test {
|
|
||||||
protected:
|
|
||||||
void SetUp() override {
|
|
||||||
LoDTensor source;
|
|
||||||
source.Resize(make_ddim({batch_size, dim}));
|
|
||||||
int* data = source.mutable_data<int>(platform::CPUPlace());
|
|
||||||
for (int i = 0; i < 16 * 32; i++) {
|
|
||||||
data[i] = i;
|
|
||||||
}
|
|
||||||
ta.Unstack(source);
|
|
||||||
}
|
|
||||||
|
|
||||||
TensorArray ta;
|
|
||||||
const int batch_size = 16;
|
|
||||||
const int dim = 32;
|
|
||||||
};
|
|
||||||
|
|
||||||
TEST_F(TensorArrayTester, Read) {
|
|
||||||
for (int i = 0; i < batch_size; i++) {
|
|
||||||
const auto& tensor = ta.Read(i);
|
|
||||||
ASSERT_EQ(tensor.dims()[0], 1);
|
|
||||||
ASSERT_EQ(tensor.dims()[1], dim);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
TEST_F(TensorArrayTester, Write) {
|
|
||||||
LoDTensor source;
|
|
||||||
source.Resize(make_ddim({1, dim}));
|
|
||||||
for (int i = 0; i < dim; i++) {
|
|
||||||
*(source.mutable_data<int>(platform::CPUPlace()) + i) = i;
|
|
||||||
}
|
|
||||||
|
|
||||||
ta.Write(2, source);
|
|
||||||
|
|
||||||
const auto& tensor = ta.Read(2);
|
|
||||||
for (int i = 0; i < dim; i++) {
|
|
||||||
EXPECT_EQ(*(tensor.data<int>() + i), *(source.data<int>() + i));
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
TEST_F(TensorArrayTester, WriteShared) {
|
|
||||||
LoDTensor source;
|
|
||||||
source.Resize(make_ddim({1, dim}));
|
|
||||||
for (int i = 0; i < dim; i++) {
|
|
||||||
*(source.mutable_data<int>(platform::CPUPlace()) + i) = i;
|
|
||||||
}
|
|
||||||
|
|
||||||
ta.WriteShared(2, source);
|
|
||||||
|
|
||||||
const auto& tensor = ta.Read(2);
|
|
||||||
for (int i = 0; i < dim; i++) {
|
|
||||||
EXPECT_EQ(*(tensor.data<int>() + i), *(source.data<int>() + i));
|
|
||||||
}
|
|
||||||
|
|
||||||
EXPECT_EQ(source.data<int>(), tensor.data<int>());
|
|
||||||
}
|
|
||||||
|
|
||||||
class TensorArrayPackTester : public ::testing::Test {
|
|
||||||
protected:
|
|
||||||
virtual void SetUp() override {
|
|
||||||
lod.push_back(std::vector<size_t>{0, 2, 9, 13});
|
|
||||||
|
|
||||||
source.set_lod(lod);
|
|
||||||
source.Resize(make_ddim({13, 128}));
|
|
||||||
source.mutable_data<int>(platform::CPUPlace());
|
|
||||||
|
|
||||||
// content of each setence: 0 1 2 3 4
|
|
||||||
const auto& level = lod.front();
|
|
||||||
for (size_t i = 0; i < level.size() - 1; i++) {
|
|
||||||
size_t begin = level[i];
|
|
||||||
size_t end = level[i + 1];
|
|
||||||
for (size_t j = begin; j < end; j++) {
|
|
||||||
auto record = source.Slice(j, j + 1);
|
|
||||||
for (int dim = 0; dim < 128; dim++) {
|
|
||||||
record.mutable_data<int>(platform::CPUPlace())[dim] = j - begin;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// unpack
|
|
||||||
meta = ta.Unpack(source, 0, true);
|
|
||||||
}
|
|
||||||
|
|
||||||
LoD lod;
|
|
||||||
TensorArray ta;
|
|
||||||
LoDTensor source;
|
|
||||||
std::vector<DySeqMeta> meta;
|
|
||||||
};
|
|
||||||
|
|
||||||
TEST_F(TensorArrayPackTester, Unpack) {
|
|
||||||
ASSERT_EQ(ta.size(), 7UL);
|
|
||||||
|
|
||||||
const auto& t0 = ta.Read(0);
|
|
||||||
const auto& t1 = ta.Read(1);
|
|
||||||
|
|
||||||
ASSERT_EQ(t0.data<int>()[0], int(0));
|
|
||||||
ASSERT_EQ(t1.data<int>()[0], int(1));
|
|
||||||
}
|
|
||||||
|
|
||||||
TEST_F(TensorArrayPackTester, Pack) {
|
|
||||||
LoDTensor packed = ta.Pack(0, meta, lod);
|
|
||||||
}
|
|
||||||
|
|
||||||
TEST_F(TensorArrayTester, size) {
|
|
||||||
ASSERT_EQ(ta.size(), static_cast<size_t>(batch_size));
|
|
||||||
}
|
|
||||||
|
|
||||||
TEST(TensorArray, LodPack) {
|
|
||||||
// three time steps, each step stores a LoDTensors
|
|
||||||
// - [0] [1]
|
|
||||||
// - [2 3], [4 5]
|
|
||||||
// - [6 7] [] [8], [9, 10]
|
|
||||||
// try to get a LoDTensor with content:
|
|
||||||
// - [0 2 6]
|
|
||||||
// - [0 2 7]
|
|
||||||
// - [0 3]
|
|
||||||
// - [1 4 8]
|
|
||||||
// - [1 5 9]
|
|
||||||
// - [1 5 10]
|
|
||||||
std::array<LoDTensor, 3> tensors;
|
|
||||||
tensors[0].Resize(make_ddim({2, 1}));
|
|
||||||
tensors[1].Resize(make_ddim({4, 1}));
|
|
||||||
tensors[2].Resize(make_ddim({5, 1}));
|
|
||||||
int index = 0;
|
|
||||||
for (auto& t : tensors) {
|
|
||||||
t.mutable_data<int>(platform::CPUPlace());
|
|
||||||
for (int i = 0; i < t.dims()[0]; i++) {
|
|
||||||
t.data<int>()[i] = index;
|
|
||||||
index++;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
std::array<LoD, 3> lods;
|
|
||||||
std::vector<std::vector<size_t>> levels{
|
|
||||||
{0, 1, 2}, {0, 2, 4}, {0, 2, 2, 3, 5}};
|
|
||||||
for (int i = 0; i < 3; i++) {
|
|
||||||
lods[i].emplace_back(levels[i].begin(), levels[i].end());
|
|
||||||
}
|
|
||||||
|
|
||||||
TensorArray ta;
|
|
||||||
for (int i = 0; i < 3; i++) {
|
|
||||||
tensors[i].set_lod(lods[i]);
|
|
||||||
ta.Write(i, tensors[i]);
|
|
||||||
}
|
|
||||||
|
|
||||||
auto merged = ta.LodPack(0);
|
|
||||||
|
|
||||||
std::vector<int> target_tensor_data{{0, 2, 6, // 0
|
|
||||||
0, 2, 7, // 1
|
|
||||||
0, 3, // 2
|
|
||||||
1, 4, 8, // 3
|
|
||||||
1, 5, 9, // 5
|
|
||||||
1, 5, 10}};
|
|
||||||
EXPECT_EQ(merged.dims()[0], (int)target_tensor_data.size());
|
|
||||||
for (size_t i = 0; i < target_tensor_data.size(); i++) {
|
|
||||||
EXPECT_EQ(target_tensor_data[i], merged.data<int>()[i]);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
} // namespace framework
|
|
||||||
} // namespace paddle
|
|
@ -0,0 +1,153 @@
|
|||||||
|
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
|
||||||
|
|
||||||
|
Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
you may not use this file except in compliance with the License.
|
||||||
|
You may obtain a copy of the License at
|
||||||
|
|
||||||
|
http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
|
||||||
|
Unless required by applicable law or agreed to in writing, software
|
||||||
|
distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
See the License for the specific language governing permissions and
|
||||||
|
limitations under the License. */
|
||||||
|
|
||||||
|
#pragma once
|
||||||
|
#include "paddle/framework/tensor.h"
|
||||||
|
|
||||||
|
namespace paddle {
|
||||||
|
namespace framework {
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @brief Copy the content of external tensor to a new place.
|
||||||
|
*
|
||||||
|
* @param[in] src The external tensor.
|
||||||
|
* @param[in] dst_place The dst place.
|
||||||
|
* @param[in] ctx The device context contains device resources.
|
||||||
|
*
|
||||||
|
* @note CopyFrom supports CPU <-> GPU, GPU <-> GPU.
|
||||||
|
*/
|
||||||
|
|
||||||
|
inline void CopyFrom(const Tensor& src, const platform::Place& dst_place,
|
||||||
|
const platform::DeviceContext& ctx, Tensor* dst) {
|
||||||
|
src.check_memory_size();
|
||||||
|
|
||||||
|
dst->Resize(src.dims());
|
||||||
|
auto src_place = src.place();
|
||||||
|
auto src_ptr = src.data<void>();
|
||||||
|
|
||||||
|
auto dst_ptr = dst->mutable_data(dst_place, src.type());
|
||||||
|
|
||||||
|
auto size = src.numel() * SizeOfType(src.type());
|
||||||
|
|
||||||
|
if (platform::is_cpu_place(src_place) && platform::is_cpu_place(dst_place)) {
|
||||||
|
memory::Copy(boost::get<platform::CPUPlace>(dst_place), dst_ptr,
|
||||||
|
boost::get<platform::CPUPlace>(src_place), src_ptr, size);
|
||||||
|
}
|
||||||
|
#ifdef PADDLE_WITH_CUDA
|
||||||
|
else if (platform::is_gpu_place(src_place) && // NOLINT
|
||||||
|
platform::is_cpu_place(dst_place)) {
|
||||||
|
auto src_gpu_place = boost::get<platform::GPUPlace>(src_place);
|
||||||
|
auto dst_cpu_place = boost::get<platform::CPUPlace>(dst_place);
|
||||||
|
auto ctx_place = ctx.GetPlace();
|
||||||
|
PADDLE_ENFORCE(platform::is_gpu_place(ctx_place));
|
||||||
|
auto ctx_gpu_place = boost::get<platform::GPUPlace>(ctx_place);
|
||||||
|
PADDLE_ENFORCE_EQ(src_gpu_place, ctx_gpu_place);
|
||||||
|
memory::Copy(
|
||||||
|
dst_cpu_place, dst_ptr, src_gpu_place, src_ptr, size,
|
||||||
|
reinterpret_cast<const platform::CUDADeviceContext&>(ctx).stream());
|
||||||
|
} else if (platform::is_cpu_place(src_place) &&
|
||||||
|
platform::is_gpu_place(dst_place)) {
|
||||||
|
auto src_cpu_place = boost::get<platform::CPUPlace>(src_place);
|
||||||
|
auto dst_gpu_place = boost::get<platform::GPUPlace>(dst_place);
|
||||||
|
auto ctx_place = ctx.GetPlace();
|
||||||
|
PADDLE_ENFORCE(platform::is_gpu_place(ctx_place));
|
||||||
|
auto ctx_gpu_place = boost::get<platform::GPUPlace>(ctx_place);
|
||||||
|
PADDLE_ENFORCE_EQ(dst_gpu_place, ctx_gpu_place);
|
||||||
|
memory::Copy(
|
||||||
|
dst_gpu_place, dst_ptr, src_cpu_place, src_ptr, size,
|
||||||
|
reinterpret_cast<const platform::CUDADeviceContext&>(ctx).stream());
|
||||||
|
} else if (platform::is_gpu_place(src_place) &&
|
||||||
|
platform::is_gpu_place(dst_place)) {
|
||||||
|
auto src_gpu_place = boost::get<platform::GPUPlace>(src_place);
|
||||||
|
auto dst_gpu_place = boost::get<platform::GPUPlace>(dst_place);
|
||||||
|
auto ctx_place = ctx.GetPlace();
|
||||||
|
PADDLE_ENFORCE(platform::is_gpu_place(ctx_place));
|
||||||
|
auto ctx_gpu_place = boost::get<platform::GPUPlace>(ctx_place);
|
||||||
|
PADDLE_ENFORCE_EQ(src_gpu_place, ctx_gpu_place);
|
||||||
|
memory::Copy(
|
||||||
|
dst_gpu_place, dst_ptr, src_gpu_place, src_ptr, size,
|
||||||
|
reinterpret_cast<const platform::CUDADeviceContext&>(ctx).stream());
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @brief Copy the content of an external vector to a tensor.
|
||||||
|
*
|
||||||
|
* @param[in] src The external tensor.
|
||||||
|
* @param[in] ctx The device context contains device resources.
|
||||||
|
*
|
||||||
|
* * @note CopyFromVector assumes that the tensor has been resized
|
||||||
|
* before invoking.
|
||||||
|
*/
|
||||||
|
template <typename T>
|
||||||
|
inline void CopyFromVector(const std::vector<T>& src,
|
||||||
|
const platform::DeviceContext& ctx, Tensor* dst) {
|
||||||
|
auto dst_place = ctx.GetPlace();
|
||||||
|
auto src_ptr = static_cast<const void*>(src.data());
|
||||||
|
platform::CPUPlace src_place;
|
||||||
|
dst->Resize({static_cast<int64_t>(src.size())});
|
||||||
|
auto dst_ptr = static_cast<void*>(dst->mutable_data<T>(dst_place));
|
||||||
|
auto size = src.size() * sizeof(T);
|
||||||
|
|
||||||
|
if (platform::is_cpu_place(dst_place)) {
|
||||||
|
memory::Copy(boost::get<platform::CPUPlace>(dst_place), dst_ptr, src_place,
|
||||||
|
src_ptr, size);
|
||||||
|
}
|
||||||
|
#ifdef PADDLE_WITH_CUDA
|
||||||
|
else if (platform::is_gpu_place(dst_place)) { // NOLINT
|
||||||
|
memory::Copy(
|
||||||
|
boost::get<platform::GPUPlace>(dst_place), dst_ptr, src_place, src_ptr,
|
||||||
|
size,
|
||||||
|
reinterpret_cast<const platform::CUDADeviceContext&>(ctx).stream());
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @brief Copy the content of a tensor to a vector
|
||||||
|
*
|
||||||
|
* @param[in] src The external tensor.
|
||||||
|
* @param[in] ctx The device context contains device resources.
|
||||||
|
*
|
||||||
|
* * @note CopyFromVector assumes that the tensor has been resized
|
||||||
|
* before invoking.
|
||||||
|
*/
|
||||||
|
template <typename T>
|
||||||
|
inline void CopyToVector(const Tensor& src, const platform::DeviceContext& ctx,
|
||||||
|
std::vector<T>* dst) {
|
||||||
|
auto src_ptr = static_cast<const void*>(src.data<T>());
|
||||||
|
auto size = src.numel() * sizeof(T);
|
||||||
|
|
||||||
|
platform::CPUPlace dst_place;
|
||||||
|
dst->resize(src.numel());
|
||||||
|
auto dst_ptr = static_cast<void*>(dst->data());
|
||||||
|
|
||||||
|
if (platform::is_cpu_place(src.place())) {
|
||||||
|
memory::Copy(dst_place, dst_ptr, boost::get<platform::CPUPlace>(src.place()),
|
||||||
|
src_ptr, size);
|
||||||
|
}
|
||||||
|
#ifdef PADDLE_WITH_CUDA
|
||||||
|
else if (platform::is_gpu_place(src.place())) { // NOLINT
|
||||||
|
memory::Copy(
|
||||||
|
dst_place, dst_ptr, boost::get<platform::GPUPlace>(src.place()), src_ptr,
|
||||||
|
size,
|
||||||
|
reinterpret_cast<const platform::CUDADeviceContext&>(ctx).stream());
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
} // namespace framework
|
||||||
|
} // namespace paddle
|
@ -0,0 +1,228 @@
|
|||||||
|
/*
|
||||||
|
Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
|
||||||
|
Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
you may not use this file except in compliance with the License.
|
||||||
|
You may obtain a copy of the License at
|
||||||
|
http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
Unless required by applicable law or agreed to in writing, software
|
||||||
|
distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
See the License for the specific language governing permissions and
|
||||||
|
limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#include "paddle/framework/tensor_util.h"
|
||||||
|
#include <gtest/gtest.h>
|
||||||
|
#include <string>
|
||||||
|
|
||||||
|
namespace paddle {
|
||||||
|
namespace framework {
|
||||||
|
TEST(CopyFrom, Tensor) {
|
||||||
|
Tensor src_tensor;
|
||||||
|
Tensor dst_tensor;
|
||||||
|
platform::CPUDeviceContext cpu_ctx((platform::CPUPlace()));
|
||||||
|
|
||||||
|
int* src_ptr =
|
||||||
|
src_tensor.mutable_data<int>(make_ddim({3, 3}), platform::CPUPlace());
|
||||||
|
|
||||||
|
int arr[9] = {1, 2, 3, 4, 5, 6, 7, 8, 9};
|
||||||
|
memcpy(src_ptr, arr, 9 * sizeof(int));
|
||||||
|
|
||||||
|
auto cpu_place = new platform::CPUPlace();
|
||||||
|
CopyFrom(src_tensor, *cpu_place, cpu_ctx, &dst_tensor);
|
||||||
|
|
||||||
|
const int* dst_ptr = dst_tensor.data<int>();
|
||||||
|
ASSERT_NE(src_ptr, dst_ptr);
|
||||||
|
for (size_t i = 0; i < 9; ++i) {
|
||||||
|
EXPECT_EQ(src_ptr[i], dst_ptr[i]);
|
||||||
|
}
|
||||||
|
|
||||||
|
Tensor slice_tensor = src_tensor.Slice(1, 2);
|
||||||
|
CopyFrom(slice_tensor, *cpu_place, cpu_ctx, &dst_tensor);
|
||||||
|
const int* slice_ptr = slice_tensor.data<int>();
|
||||||
|
dst_ptr = dst_tensor.data<int>();
|
||||||
|
ASSERT_NE(dst_ptr, slice_ptr);
|
||||||
|
for (size_t i = 0; i < 3; ++i) {
|
||||||
|
EXPECT_EQ(dst_ptr[i], slice_ptr[i]);
|
||||||
|
}
|
||||||
|
#ifdef PADDLE_WITH_CUDA
|
||||||
|
{
|
||||||
|
Tensor src_tensor;
|
||||||
|
Tensor gpu_tensor;
|
||||||
|
Tensor dst_tensor;
|
||||||
|
|
||||||
|
int* src_ptr =
|
||||||
|
src_tensor.mutable_data<int>(make_ddim({3, 3}), platform::CPUPlace());
|
||||||
|
|
||||||
|
int arr[9] = {1, 2, 3, 4, 5, 6, 7, 8, 9};
|
||||||
|
memcpy(src_ptr, arr, 9 * sizeof(int));
|
||||||
|
|
||||||
|
// CPU Tensor to GPU Tensor
|
||||||
|
auto gpu_place = new platform::GPUPlace(0);
|
||||||
|
platform::CUDADeviceContext gpu_ctx(*gpu_place);
|
||||||
|
CopyFrom(src_tensor, *gpu_place, gpu_ctx, &gpu_tensor);
|
||||||
|
|
||||||
|
// GPU Tensor to CPU Tensor
|
||||||
|
auto cpu_place = new platform::CPUPlace();
|
||||||
|
CopyFrom(gpu_tensor, *cpu_place, gpu_ctx, &dst_tensor);
|
||||||
|
|
||||||
|
// Sync before Compare Tensors
|
||||||
|
gpu_ctx.Wait();
|
||||||
|
const int* dst_ptr = dst_tensor.data<int>();
|
||||||
|
ASSERT_NE(src_ptr, dst_ptr);
|
||||||
|
for (size_t i = 0; i < 9; ++i) {
|
||||||
|
EXPECT_EQ(src_ptr[i], dst_ptr[i]);
|
||||||
|
}
|
||||||
|
|
||||||
|
Tensor slice_tensor = src_tensor.Slice(1, 2);
|
||||||
|
|
||||||
|
// CPU Slice Tensor to GPU Tensor
|
||||||
|
CopyFrom(slice_tensor, *gpu_place, gpu_ctx, &gpu_tensor);
|
||||||
|
|
||||||
|
// GPU Tensor to CPU Tensor
|
||||||
|
CopyFrom(gpu_tensor, *cpu_place, gpu_ctx, &dst_tensor);
|
||||||
|
|
||||||
|
// Sync before Compare Slice Tensors
|
||||||
|
gpu_ctx.Wait();
|
||||||
|
const int* slice_ptr = slice_tensor.data<int>();
|
||||||
|
dst_ptr = dst_tensor.data<int>();
|
||||||
|
ASSERT_NE(dst_ptr, slice_ptr);
|
||||||
|
for (size_t i = 0; i < 3; ++i) {
|
||||||
|
EXPECT_EQ(dst_ptr[i], slice_ptr[i]);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
|
||||||
|
TEST(CopyFromVector, Tensor) {
|
||||||
|
using namespace paddle::framework;
|
||||||
|
using namespace paddle::platform;
|
||||||
|
{
|
||||||
|
std::vector<int> src_vec = {1, 2, 3, 4, 5, 6, 7, 8, 9};
|
||||||
|
Tensor cpu_tensor;
|
||||||
|
|
||||||
|
// Copy to CPU Tensor
|
||||||
|
cpu_tensor.Resize(make_ddim({3, 3}));
|
||||||
|
auto cpu_place = new paddle::platform::CPUPlace();
|
||||||
|
CPUDeviceContext cpu_ctx(*cpu_place);
|
||||||
|
CopyFromVector<int>(src_vec, cpu_ctx, &cpu_tensor);
|
||||||
|
|
||||||
|
// Compare Tensors
|
||||||
|
const int* cpu_ptr = cpu_tensor.data<int>();
|
||||||
|
const int* src_ptr = src_vec.data();
|
||||||
|
ASSERT_NE(src_ptr, cpu_ptr);
|
||||||
|
for (size_t i = 0; i < 9; ++i) {
|
||||||
|
EXPECT_EQ(src_ptr[i], cpu_ptr[i]);
|
||||||
|
}
|
||||||
|
|
||||||
|
src_vec.erase(src_vec.begin(), src_vec.begin() + 5);
|
||||||
|
cpu_tensor.Resize(make_ddim({2, 2}));
|
||||||
|
CopyFromVector<int>(src_vec, cpu_ctx, &cpu_tensor);
|
||||||
|
cpu_ptr = cpu_tensor.data<int>();
|
||||||
|
src_ptr = src_vec.data();
|
||||||
|
ASSERT_NE(src_ptr, cpu_ptr);
|
||||||
|
for (size_t i = 0; i < 5; ++i) {
|
||||||
|
EXPECT_EQ(src_ptr[i], cpu_ptr[i]);
|
||||||
|
}
|
||||||
|
|
||||||
|
delete cpu_place;
|
||||||
|
}
|
||||||
|
|
||||||
|
#ifdef PADDLE_WITH_CUDA
|
||||||
|
{
|
||||||
|
std::vector<int> src_vec = {1, 2, 3, 4, 5, 6, 7, 8, 9};
|
||||||
|
Tensor cpu_tensor;
|
||||||
|
Tensor gpu_tensor;
|
||||||
|
Tensor dst_tensor;
|
||||||
|
|
||||||
|
// Copy to CPU Tensor
|
||||||
|
cpu_tensor.Resize(make_ddim({3, 3}));
|
||||||
|
auto cpu_place = new paddle::platform::CPUPlace();
|
||||||
|
CPUDeviceContext cpu_ctx(*cpu_place);
|
||||||
|
CopyFromVector<int>(src_vec, cpu_ctx, &cpu_tensor);
|
||||||
|
|
||||||
|
// Copy to GPUTensor
|
||||||
|
gpu_tensor.Resize(make_ddim({3, 3}));
|
||||||
|
auto gpu_place = new paddle::platform::GPUPlace();
|
||||||
|
CUDADeviceContext gpu_ctx(*gpu_place);
|
||||||
|
CopyFromVector<int>(src_vec, gpu_ctx, &gpu_tensor);
|
||||||
|
// Copy from GPU to CPU tensor for comparison
|
||||||
|
CopyFrom(gpu_tensor, *cpu_place, gpu_ctx, &dst_tensor);
|
||||||
|
|
||||||
|
// Sync before Compare Tensors
|
||||||
|
gpu_ctx.Wait();
|
||||||
|
const int* src_ptr = src_vec.data();
|
||||||
|
const int* cpu_ptr = cpu_tensor.data<int>();
|
||||||
|
const int* dst_ptr = dst_tensor.data<int>();
|
||||||
|
ASSERT_NE(src_ptr, cpu_ptr);
|
||||||
|
ASSERT_NE(src_ptr, dst_ptr);
|
||||||
|
for (size_t i = 0; i < 9; ++i) {
|
||||||
|
EXPECT_EQ(src_ptr[i], cpu_ptr[i]);
|
||||||
|
EXPECT_EQ(src_ptr[i], dst_ptr[i]);
|
||||||
|
}
|
||||||
|
|
||||||
|
src_vec.erase(src_vec.begin(), src_vec.begin() + 5);
|
||||||
|
|
||||||
|
cpu_tensor.Resize(make_ddim({2, 2}));
|
||||||
|
CopyFromVector<int>(src_vec, cpu_ctx, &cpu_tensor);
|
||||||
|
gpu_tensor.Resize(make_ddim({2, 2}));
|
||||||
|
CopyFromVector<int>(src_vec, gpu_ctx, &gpu_tensor);
|
||||||
|
CopyFrom(gpu_tensor, *cpu_place, gpu_ctx, &dst_tensor);
|
||||||
|
|
||||||
|
// Sync before Compare Tensors
|
||||||
|
gpu_ctx.Wait();
|
||||||
|
src_ptr = src_vec.data();
|
||||||
|
cpu_ptr = cpu_tensor.data<int>();
|
||||||
|
dst_ptr = dst_tensor.data<int>();
|
||||||
|
ASSERT_NE(src_ptr, cpu_ptr);
|
||||||
|
ASSERT_NE(src_ptr, dst_ptr);
|
||||||
|
for (size_t i = 0; i < 5; ++i) {
|
||||||
|
EXPECT_EQ(src_ptr[i], cpu_ptr[i]);
|
||||||
|
EXPECT_EQ(src_ptr[i], dst_ptr[i]);
|
||||||
|
}
|
||||||
|
|
||||||
|
delete cpu_place;
|
||||||
|
delete gpu_place;
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
|
||||||
|
TEST(CopyToVector, Tensor) {
|
||||||
|
using namespace paddle::framework;
|
||||||
|
using namespace paddle::platform;
|
||||||
|
{
|
||||||
|
Tensor src;
|
||||||
|
int* src_ptr = src.mutable_data<int>({3, 3}, CPUPlace());
|
||||||
|
for (int i = 0; i < 3 * 3; ++i) {
|
||||||
|
src_ptr[i] = i;
|
||||||
|
}
|
||||||
|
|
||||||
|
CPUPlace place;
|
||||||
|
CPUDeviceContext cpu_ctx(place);
|
||||||
|
std::vector<int> dst;
|
||||||
|
CopyToVector<int>(src, cpu_ctx, &dst);
|
||||||
|
|
||||||
|
for (int i = 0; i < 3 * 3; ++i) {
|
||||||
|
EXPECT_EQ(src_ptr[i], dst[i]);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
#ifdef PADDLE_WITH_CUDA
|
||||||
|
{
|
||||||
|
std::vector<int> src_vec = {1, 2, 3, 4, 5, 6, 7, 8, 9};
|
||||||
|
Tensor gpu_tensor;
|
||||||
|
GPUPlace place;
|
||||||
|
CUDADeviceContext gpu_ctx(place);
|
||||||
|
CopyFromVector<int>(src_vec, gpu_ctx, &gpu_tensor);
|
||||||
|
|
||||||
|
std::vector<int> dst;
|
||||||
|
CopyToVector<int>(gpu_tensor, gpu_ctx, &dst);
|
||||||
|
|
||||||
|
for (int i = 0; i < 3 * 3; ++i) {
|
||||||
|
EXPECT_EQ(src_vec[i], dst[i]);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
|
||||||
|
} // namespace framework
|
||||||
|
} // namespace paddle
|
File diff suppressed because it is too large
Load Diff
@ -1,233 +0,0 @@
|
|||||||
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
|
|
||||||
|
|
||||||
Licensed under the Apache License, Version 2.0 (the "License");
|
|
||||||
you may not use this file except in compliance with the License.
|
|
||||||
You may obtain a copy of the License at
|
|
||||||
|
|
||||||
http://www.apache.org/licenses/LICENSE-2.0
|
|
||||||
|
|
||||||
Unless required by applicable law or agreed to in writing, software
|
|
||||||
distributed under the License is distributed on an "AS IS" BASIS,
|
|
||||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
||||||
See the License for the specific language governing permissions and
|
|
||||||
limitations under the License. */
|
|
||||||
|
|
||||||
#pragma once
|
|
||||||
|
|
||||||
#ifdef PADDLE_WITH_TESTING
|
|
||||||
#include "gtest/gtest.h"
|
|
||||||
#endif
|
|
||||||
|
|
||||||
#include "paddle/framework/lod_tensor.h"
|
|
||||||
#include "paddle/framework/operator.h"
|
|
||||||
#include "paddle/framework/tensor_array.h"
|
|
||||||
#include "paddle/framework/variable.h"
|
|
||||||
#include "paddle/operators/rnn/recurrent_op_utils.h"
|
|
||||||
|
|
||||||
namespace paddle {
|
|
||||||
namespace operators {
|
|
||||||
|
|
||||||
class RNNAlgorithm {
|
|
||||||
public:
|
|
||||||
enum ComputeMode { kForward = 0, kBackward = 1 };
|
|
||||||
static const std::array<rnn::ArgumentName, 2> kArgNames;
|
|
||||||
using value_type = float;
|
|
||||||
|
|
||||||
/*
|
|
||||||
* Different `Run` method for forward and backward, `_` is just for template
|
|
||||||
* specifialization.
|
|
||||||
*/
|
|
||||||
template <ComputeMode _>
|
|
||||||
void Run(const framework::Scope& scope, const framework::OperatorBase& op,
|
|
||||||
const platform::DeviceContext& dev_ctx);
|
|
||||||
/*
|
|
||||||
* Split the inputs(LoDTensors) to segments for each time step.
|
|
||||||
*/
|
|
||||||
void SplitInputs();
|
|
||||||
|
|
||||||
/*
|
|
||||||
* Create step-scopes to store temporary outputs in each time steps.
|
|
||||||
*/
|
|
||||||
void CreateScopes();
|
|
||||||
|
|
||||||
/*
|
|
||||||
* Link TensorArray steps to the corresponding variables located in
|
|
||||||
* step-scopes.
|
|
||||||
*/
|
|
||||||
void WriteStepInputs();
|
|
||||||
|
|
||||||
/*
|
|
||||||
* Write output of each step to the corresponding TensorArray.
|
|
||||||
*/
|
|
||||||
void WriteStepOutputs();
|
|
||||||
|
|
||||||
/*
|
|
||||||
* Initialize the states, each state will have a corresponding pre-state,
|
|
||||||
* which share the memory with the state in the previous time state. The
|
|
||||||
* pre-state in the first time step will be initialized with an zero tensor or
|
|
||||||
* a tensor in parent scope if is provided.
|
|
||||||
*/
|
|
||||||
void InitStates();
|
|
||||||
|
|
||||||
/*
|
|
||||||
* Create state variables for each time step.
|
|
||||||
*/
|
|
||||||
void CreateState(const rnn::StateAttr& state, size_t step);
|
|
||||||
|
|
||||||
/*
|
|
||||||
* Link pre-state variable in current scope to the state variable in the
|
|
||||||
* previous time step (scope) by reference.
|
|
||||||
*/
|
|
||||||
void LinkState(const rnn::StateAttr& state, size_t step);
|
|
||||||
|
|
||||||
/*
|
|
||||||
* Link the pre-state of the first time step to the `boot-state` in parent's
|
|
||||||
* scope.
|
|
||||||
*/
|
|
||||||
void LinkInitialState(const rnn::StateAttr& state);
|
|
||||||
|
|
||||||
/*
|
|
||||||
* Copy the gradient from `pre-state` in the first step-scope to the
|
|
||||||
* `boot-state` in parent's scope.
|
|
||||||
*/
|
|
||||||
void ExportInitialStateGradient(const rnn::StateAttr& state);
|
|
||||||
|
|
||||||
/*
|
|
||||||
* Calculate time steps.
|
|
||||||
*/
|
|
||||||
void RunSteps();
|
|
||||||
|
|
||||||
/*
|
|
||||||
* Concatenate outputs in each time step and generate a LoDTensor.
|
|
||||||
*/
|
|
||||||
void ConcatOutputs();
|
|
||||||
|
|
||||||
void SetComputeMode(ComputeMode mode) { mode_ = mode; }
|
|
||||||
bool IsForward() const { return mode_ == ComputeMode::kForward; }
|
|
||||||
bool IsBackward() const { return mode_ == ComputeMode::kBackward; }
|
|
||||||
|
|
||||||
/*
|
|
||||||
* set a step unit that is created according to a RecurrentOp's step unit.
|
|
||||||
*/
|
|
||||||
void SetStepUnit(std::unique_ptr<framework::OperatorBase> step_unit) {
|
|
||||||
PADDLE_ENFORCE_NOT_NULL(step_unit);
|
|
||||||
step_unit_ = std::move(step_unit);
|
|
||||||
}
|
|
||||||
const framework::OperatorBase& GetStepUnit() const { return *step_unit_; }
|
|
||||||
|
|
||||||
const framework::TensorArray& state(const std::string& name) const {
|
|
||||||
auto it = states_.find(name);
|
|
||||||
PADDLE_ENFORCE(it != states_.end());
|
|
||||||
return it->second;
|
|
||||||
}
|
|
||||||
const framework::TensorArray& step_input(const std::string& name) const {
|
|
||||||
auto it = step_inputs_.find(name);
|
|
||||||
PADDLE_ENFORCE(it != step_inputs_.end());
|
|
||||||
return it->second;
|
|
||||||
}
|
|
||||||
const framework::TensorArray& step_output(const std::string& name) const {
|
|
||||||
auto it = step_outputs_.find(name);
|
|
||||||
PADDLE_ENFORCE(it != step_outputs_.end());
|
|
||||||
return it->second;
|
|
||||||
}
|
|
||||||
|
|
||||||
protected:
|
|
||||||
struct ArgCache {
|
|
||||||
framework::Scope const* scope;
|
|
||||||
std::vector<framework::Scope*>* scopes;
|
|
||||||
std::map<std::string, framework::Variable*> inputs;
|
|
||||||
std::map<std::string, framework::Variable*> outputs;
|
|
||||||
platform::DeviceContext const* dev_ctx;
|
|
||||||
|
|
||||||
size_t num_steps{0};
|
|
||||||
|
|
||||||
void Init(const rnn::ArgumentName& name, const framework::OperatorBase& op,
|
|
||||||
const framework::Scope& scope,
|
|
||||||
platform::DeviceContext const* dev_ctx, rnn::Argument* arg);
|
|
||||||
|
|
||||||
framework::Scope& GetScope(size_t index) {
|
|
||||||
PADDLE_ENFORCE_LT(index, num_steps);
|
|
||||||
return *scopes->at(index);
|
|
||||||
}
|
|
||||||
|
|
||||||
framework::LoDTensor* GetTensor(const framework::Scope& scope,
|
|
||||||
const std::string& name);
|
|
||||||
|
|
||||||
private:
|
|
||||||
void InitArgument(const rnn::ArgumentName& name,
|
|
||||||
const framework::OperatorBase& op, rnn::Argument* arg);
|
|
||||||
void CacheScopes(const framework::Scope& scope, const rnn::Argument& arg);
|
|
||||||
void CacheInlinks(const framework::Scope& scope,
|
|
||||||
const std::vector<std::string>& names);
|
|
||||||
void CacheOutlinks(const framework::Scope& scope,
|
|
||||||
const std::vector<std::string>& names);
|
|
||||||
framework::Variable* GetVariable(const framework::Scope& scope,
|
|
||||||
const std::string& name);
|
|
||||||
};
|
|
||||||
|
|
||||||
private:
|
|
||||||
std::unique_ptr<framework::OperatorBase> step_unit_;
|
|
||||||
std::map<std::string, framework::TensorArray> states_;
|
|
||||||
std::map<std::string, framework::TensorArray> step_inputs_;
|
|
||||||
std::map<std::string, framework::TensorArray> step_outputs_;
|
|
||||||
std::map<std::string, std::vector<framework::DySeqMeta>> dy_seq_metas_;
|
|
||||||
rnn::Argument arg_;
|
|
||||||
ArgCache cache_;
|
|
||||||
ComputeMode mode_{ComputeMode::kForward};
|
|
||||||
|
|
||||||
#ifdef PADDLE_WITH_TESTING
|
|
||||||
// test forward
|
|
||||||
friend class RNNAlgorithmTestHelper;
|
|
||||||
FRIEND_TEST(RNNAlgorithmTestHelper, SplitInputs);
|
|
||||||
FRIEND_TEST(RNNAlgorithmTestHelper, CreateCache);
|
|
||||||
FRIEND_TEST(RNNAlgorithmTestHelper, CreateScopes);
|
|
||||||
FRIEND_TEST(RNNAlgorithmTestHelper, WriteStepInputs);
|
|
||||||
FRIEND_TEST(RNNAlgorithmTestHelper, WriteStepOutputs);
|
|
||||||
FRIEND_TEST(RNNAlgorithmTestHelper, InitStates);
|
|
||||||
FRIEND_TEST(RNNAlgorithmTestHelper, ConcatOutputs);
|
|
||||||
// TODO(superjom) test backward
|
|
||||||
#endif
|
|
||||||
};
|
|
||||||
|
|
||||||
class DynamicRecurrentOp : public framework::OperatorBase {
|
|
||||||
public:
|
|
||||||
DynamicRecurrentOp(const std::string& type,
|
|
||||||
const framework::VariableNameMap& inputs,
|
|
||||||
const framework::VariableNameMap& outputs,
|
|
||||||
const framework::AttributeMap& attrs)
|
|
||||||
: OperatorBase(type, inputs, outputs, attrs) {}
|
|
||||||
|
|
||||||
DynamicRecurrentOp(const DynamicRecurrentOp& o)
|
|
||||||
: framework::OperatorBase(
|
|
||||||
static_cast<const framework::OperatorBase&>(o)) {
|
|
||||||
PADDLE_THROW("Not implemented");
|
|
||||||
}
|
|
||||||
|
|
||||||
void Run(const framework::Scope& scope,
|
|
||||||
const platform::DeviceContext& dev_ctx) const override;
|
|
||||||
|
|
||||||
mutable RNNAlgorithm rnn;
|
|
||||||
};
|
|
||||||
|
|
||||||
class DynamicRecurrentGradientOp : public framework::OperatorBase {
|
|
||||||
public:
|
|
||||||
DynamicRecurrentGradientOp(const std::string& type,
|
|
||||||
const framework::VariableNameMap& inputs,
|
|
||||||
const framework::VariableNameMap& outputs,
|
|
||||||
const framework::AttributeMap& attrs)
|
|
||||||
: OperatorBase(type, inputs, outputs, attrs) {}
|
|
||||||
|
|
||||||
DynamicRecurrentGradientOp(const DynamicRecurrentGradientOp& o)
|
|
||||||
: framework::OperatorBase(
|
|
||||||
static_cast<const framework::OperatorBase&>(o)) {
|
|
||||||
PADDLE_THROW("Not implemented");
|
|
||||||
}
|
|
||||||
|
|
||||||
void Run(const framework::Scope& scope,
|
|
||||||
const platform::DeviceContext& dev_ctx) const override;
|
|
||||||
|
|
||||||
mutable RNNAlgorithm rnn;
|
|
||||||
};
|
|
||||||
|
|
||||||
} // namespace operators
|
|
||||||
} // namespace paddle
|
|
@ -1,217 +0,0 @@
|
|||||||
#include "paddle/operators/dynamic_recurrent_op.h"
|
|
||||||
|
|
||||||
#include <gtest/gtest.h>
|
|
||||||
|
|
||||||
#include "paddle/framework/ddim.h"
|
|
||||||
#include "paddle/framework/lod_tensor.h"
|
|
||||||
#include "paddle/framework/op_desc.h"
|
|
||||||
#include "paddle/framework/op_registry.h"
|
|
||||||
#include "paddle/operators/net_op.h"
|
|
||||||
|
|
||||||
namespace paddle {
|
|
||||||
namespace operators {
|
|
||||||
|
|
||||||
using framework::Scope;
|
|
||||||
using framework::TensorArray;
|
|
||||||
using framework::LoDTensor;
|
|
||||||
using framework::Variable;
|
|
||||||
|
|
||||||
class TestOp : public framework::OperatorBase {
|
|
||||||
public:
|
|
||||||
using framework::OperatorBase::OperatorBase;
|
|
||||||
DEFINE_OP_CLONE_METHOD(TestOp);
|
|
||||||
void Run(const Scope& scope,
|
|
||||||
const platform::DeviceContext& dev_ctx) const override {}
|
|
||||||
};
|
|
||||||
|
|
||||||
void OpDescNewVar(const std::string& param_name,
|
|
||||||
std::initializer_list<const char*> arguments,
|
|
||||||
paddle::framework::OpDesc::Var* var) {
|
|
||||||
var->set_parameter(param_name);
|
|
||||||
for (auto& arg_name : arguments) {
|
|
||||||
var->add_arguments(arg_name);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// create a LoD tensor in scope with specific dims
|
|
||||||
LoDTensor* CreateVar(Scope& scope, std::string name, framework::DDim dims,
|
|
||||||
const platform::Place& place) {
|
|
||||||
auto* var = scope.Var(name);
|
|
||||||
auto* tensor = var->GetMutable<LoDTensor>();
|
|
||||||
tensor->Resize(dims);
|
|
||||||
tensor->mutable_data<float>(place);
|
|
||||||
return tensor;
|
|
||||||
}
|
|
||||||
|
|
||||||
class RNNAlgorithmTestHelper : public ::testing::Test {
|
|
||||||
protected:
|
|
||||||
const rnn::ArgumentName argname = RNNAlgorithm::kArgNames[0];
|
|
||||||
|
|
||||||
virtual void SetUp() override {
|
|
||||||
CreateGlobalVariables();
|
|
||||||
|
|
||||||
auto op_desc = CreateOpDesc();
|
|
||||||
op = paddle::framework::OpRegistry::CreateOp(op_desc);
|
|
||||||
dop = &(dynamic_cast<DynamicRecurrentOp*>(op.get())->rnn);
|
|
||||||
InitCacheManually();
|
|
||||||
InitStepNet();
|
|
||||||
}
|
|
||||||
|
|
||||||
framework::OpDesc CreateOpDesc() {
|
|
||||||
// create op
|
|
||||||
paddle::framework::OpDesc op_desc;
|
|
||||||
op_desc.set_type("dynamic_recurrent");
|
|
||||||
|
|
||||||
OpDescNewVar(argname.inlinks, {"in0"}, op_desc.add_inputs());
|
|
||||||
OpDescNewVar(argname.initial_states, {"boot_mem"}, op_desc.add_inputs());
|
|
||||||
OpDescNewVar(argname.step_scopes, {"step_scopes"}, op_desc.add_outputs());
|
|
||||||
OpDescNewVar(argname.outlinks, {"out0"}, op_desc.add_outputs());
|
|
||||||
|
|
||||||
// set pre-states
|
|
||||||
auto pre_memories = op_desc.mutable_attrs()->Add();
|
|
||||||
pre_memories->set_name(argname.ex_states);
|
|
||||||
pre_memories->set_type(paddle::framework::AttrType::STRINGS);
|
|
||||||
auto pre_memories_item = pre_memories->add_strings();
|
|
||||||
*pre_memories_item = "mem@pre";
|
|
||||||
|
|
||||||
// set states
|
|
||||||
auto memories = op_desc.mutable_attrs()->Add();
|
|
||||||
memories->set_name(argname.states);
|
|
||||||
memories->set_type(paddle::framework::AttrType::STRINGS);
|
|
||||||
auto memories_item = memories->add_strings();
|
|
||||||
*memories_item = "mem";
|
|
||||||
return op_desc;
|
|
||||||
}
|
|
||||||
|
|
||||||
void CreateGlobalVariables() {
|
|
||||||
platform::CPUPlace place;
|
|
||||||
scope.Var("step_scopes");
|
|
||||||
CreateVar(scope, "boot_mem", framework::make_ddim({10, 20}), place);
|
|
||||||
CreateVar(scope, "out0", framework::make_ddim({10, 20}), place);
|
|
||||||
auto* in0 = CreateVar(scope, "in0", framework::make_ddim({10, 8}), place);
|
|
||||||
// 10 instanes with 4 sentences, length is 4, 3, 2, 1 respectively.
|
|
||||||
framework::LoD in0_lod(1);
|
|
||||||
for (int x : std::vector<int>{0, 4, 7, 9, 10}) {
|
|
||||||
in0_lod[0].push_back(x);
|
|
||||||
}
|
|
||||||
in0->set_lod(in0_lod);
|
|
||||||
in0->Resize(framework::make_ddim({10, 8}));
|
|
||||||
// set the content, each sentence content is seqid.batchid
|
|
||||||
// the seqid starts from 0
|
|
||||||
int start = 0;
|
|
||||||
for (size_t seqid = 0; seqid < in0_lod.size() - 1; seqid++) {
|
|
||||||
for (size_t batchid = 0;
|
|
||||||
batchid < in0_lod[0][seqid + 1] - in0_lod[0][seqid]; batchid++) {
|
|
||||||
float v = seqid + batchid * 0.1;
|
|
||||||
|
|
||||||
for (size_t dim = 0; dim < 8; dim++) {
|
|
||||||
in0->data<float>()[start * 8 + dim] = v;
|
|
||||||
}
|
|
||||||
start++;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
void InitCacheManually() {
|
|
||||||
dop->cache_.Init(RNNAlgorithm::kArgNames[0], *op, scope, &device_context,
|
|
||||||
&dop->arg_);
|
|
||||||
}
|
|
||||||
|
|
||||||
void InitStepNet() {
|
|
||||||
std::unique_ptr<framework::OperatorBase> stepnet{new NetOp};
|
|
||||||
dynamic_cast<NetOp*>(stepnet.get())
|
|
||||||
->AppendOp(std::unique_ptr<TestOp>(new TestOp(
|
|
||||||
"test", {{"inputs", {"in0"}}, {"initial_states", {"boot_mem"}}},
|
|
||||||
{{"outputs", {"out0"}}, {"step_scopes", {"step_scopes"}}}, {})));
|
|
||||||
dop->SetStepUnit(std::move(stepnet));
|
|
||||||
}
|
|
||||||
|
|
||||||
protected:
|
|
||||||
RNNAlgorithm* dop;
|
|
||||||
std::unique_ptr<framework::OperatorBase> op;
|
|
||||||
paddle::platform::CPUDeviceContext device_context;
|
|
||||||
paddle::framework::Scope scope;
|
|
||||||
};
|
|
||||||
|
|
||||||
TEST_F(RNNAlgorithmTestHelper, CreateCache) {
|
|
||||||
const rnn::Argument& arg = dop->arg_;
|
|
||||||
ASSERT_EQ(arg.inlinks.size(), 1UL);
|
|
||||||
ASSERT_EQ(arg.outlinks.size(), 1UL);
|
|
||||||
}
|
|
||||||
|
|
||||||
TEST_F(RNNAlgorithmTestHelper, SplitInputs) {
|
|
||||||
dop->SplitInputs();
|
|
||||||
auto& in0_ta = dop->step_inputs_["in0"];
|
|
||||||
ASSERT_EQ(in0_ta.size(), 4UL);
|
|
||||||
|
|
||||||
const auto& batch0 = in0_ta.Read(0);
|
|
||||||
const auto& batch1 = in0_ta.Read(1);
|
|
||||||
const auto& batch2 = in0_ta.Read(2);
|
|
||||||
const auto& batch3 = in0_ta.Read(3);
|
|
||||||
EXPECT_EQ(batch0.dims()[0], 4);
|
|
||||||
EXPECT_EQ(batch1.dims()[0], 3);
|
|
||||||
EXPECT_EQ(batch2.dims()[0], 2);
|
|
||||||
EXPECT_EQ(batch3.dims()[0], 1);
|
|
||||||
}
|
|
||||||
|
|
||||||
TEST_F(RNNAlgorithmTestHelper, CreateScopes) {
|
|
||||||
dop->SplitInputs();
|
|
||||||
dop->CreateScopes();
|
|
||||||
ASSERT_EQ(dop->cache_.num_steps, 4UL);
|
|
||||||
ASSERT_EQ(dop->cache_.scopes->size(), 4UL);
|
|
||||||
}
|
|
||||||
|
|
||||||
TEST_F(RNNAlgorithmTestHelper, WriteStepInputs) {
|
|
||||||
dop->SplitInputs();
|
|
||||||
dop->CreateScopes();
|
|
||||||
dop->WriteStepInputs();
|
|
||||||
|
|
||||||
for (size_t step = 0; step < dop->cache_.num_steps; step++) {
|
|
||||||
auto& scope = dop->cache_.GetScope(step);
|
|
||||||
for (auto name : std::vector<std::string>({"in0"})) {
|
|
||||||
ASSERT_TRUE(scope.FindVar(name) != nullptr);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
TEST_F(RNNAlgorithmTestHelper, WriteStepOutputs) {
|
|
||||||
dop->SplitInputs();
|
|
||||||
dop->CreateScopes();
|
|
||||||
dop->WriteStepInputs();
|
|
||||||
dop->WriteStepOutputs();
|
|
||||||
|
|
||||||
for (size_t step = 0; step < dop->cache_.num_steps; step++) {
|
|
||||||
auto& scope = dop->cache_.GetScope(step);
|
|
||||||
for (auto name : std::vector<std::string>({"out0"})) {
|
|
||||||
ASSERT_TRUE(scope.FindVar(name));
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
TEST_F(RNNAlgorithmTestHelper, ConcatOutputs) {
|
|
||||||
// Let's leave this test to python unittest.
|
|
||||||
}
|
|
||||||
|
|
||||||
TEST_F(RNNAlgorithmTestHelper, InitStates) {
|
|
||||||
dop->SetComputeMode(RNNAlgorithm::ComputeMode::kForward);
|
|
||||||
dop->SplitInputs();
|
|
||||||
dop->CreateScopes();
|
|
||||||
dop->WriteStepInputs();
|
|
||||||
dop->WriteStepOutputs();
|
|
||||||
dop->InitStates();
|
|
||||||
|
|
||||||
for (size_t step = 0; step < dop->cache_.num_steps; step++) {
|
|
||||||
auto& scope = dop->cache_.GetScope(step);
|
|
||||||
auto state = scope.FindVar("mem");
|
|
||||||
ASSERT_TRUE(state != nullptr);
|
|
||||||
|
|
||||||
auto* pre_state = scope.FindVar("mem@pre");
|
|
||||||
ASSERT_TRUE(pre_state != nullptr);
|
|
||||||
|
|
||||||
auto* boot_state = scope.FindVar("boot_mem");
|
|
||||||
ASSERT_TRUE(boot_state != nullptr);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
} // operators
|
|
||||||
} // namespace paddle
|
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in new issue