Feature/copytensor (#5455)
* "make global tensor function independently" * "replace functor" * "fix inline template error" * "fix tensor array with CopyFrom" * "fix other case use CopyFrom" * "move the op interface hardly" * "fix operators" * "fix typo" * "delete dynamic recurrent rnn and fix gru_unit in debugmode" * "fix unique_ptr copy" * "fix cuda copy" * "fix namespace error" * "removed nccl python test" * "fix include error" * "fix typo" * fix copy util testrelease/0.11.0
parent
748fdbbec5
commit
45062fe5d7
File diff suppressed because it is too large
Load Diff
@ -1,132 +0,0 @@
|
||||
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License. */
|
||||
|
||||
#pragma once
|
||||
#include <vector>
|
||||
|
||||
#include "paddle/framework/lod_tensor.h"
|
||||
|
||||
namespace paddle {
|
||||
namespace framework {
|
||||
|
||||
/*
|
||||
* DyBatchSeqPosition stores indices of the basic element in tensor. It is used
|
||||
* after lod-tensor's re-assembling, its info can be used to recover the order
|
||||
* in original lod-tensor.
|
||||
*/
|
||||
struct DySeqMeta {
|
||||
DySeqMeta(size_t begin, size_t end, size_t ori_idx)
|
||||
: begin(begin), end(end), ori_idx(ori_idx) {}
|
||||
|
||||
size_t begin;
|
||||
size_t end; // not included
|
||||
size_t ori_idx;
|
||||
};
|
||||
|
||||
using DySeqMetaBatch = std::vector<DySeqMeta>;
|
||||
|
||||
/*
|
||||
* Extract the indices of instances.
|
||||
*/
|
||||
std::vector<size_t> GenDyBatchIndice(const DySeqMetaBatch &metas, int batch_id);
|
||||
|
||||
/*
|
||||
* TensorArray is a C-array-like array of tensors, it is meant to be used with
|
||||
* dynamic iteration primitives such as while_loop. It is used to segment inputs
|
||||
* and store states in all time steps.
|
||||
*
|
||||
* By providing some methods similar to a C++ array, the difinition of some
|
||||
* state-based dynamic models such as RNN cound be more natural and highly
|
||||
* flexible.
|
||||
*/
|
||||
class TensorArray {
|
||||
public:
|
||||
using value_type = float;
|
||||
|
||||
// max number of values allowed to store.
|
||||
const size_t MAX_SIZE{100000};
|
||||
|
||||
/*
|
||||
* Read the value at location `index` in the `TensorArray`.
|
||||
*/
|
||||
const LoDTensor &Read(size_t index) const;
|
||||
|
||||
/*
|
||||
* Write value into the index of the TensorArray.
|
||||
*/
|
||||
void Write(size_t index, const LoDTensor &value);
|
||||
|
||||
/*
|
||||
* Write value into the index of the TensorArray, with memory shared.
|
||||
*/
|
||||
void WriteShared(size_t index, const LoDTensor &value);
|
||||
|
||||
/*
|
||||
* Recover the original LoD-arranged LoDTensor with the `values`, `level` and
|
||||
* `indice_map`.
|
||||
*/
|
||||
LoDTensor Pack(size_t level, const DySeqMetaBatch &meta,
|
||||
const LoD &lod) const;
|
||||
|
||||
/*
|
||||
* Split LoDTensor in some `level` and write the generated batches to
|
||||
* `values`, if set `desend`, will sort by length in descending order else in
|
||||
* ascending order.
|
||||
*/
|
||||
DySeqMetaBatch Unpack(const LoDTensor &source, int level, bool length_desend);
|
||||
|
||||
/*
|
||||
* Pack an array of LoDTensors to a LoDTensor.
|
||||
*/
|
||||
LoDTensor LodPack(size_t level) const;
|
||||
|
||||
/*
|
||||
* Unpack a LoDTensor to an array of LoDTensors.
|
||||
*/
|
||||
void LodUnpack(const LoDTensor &source, size_t level);
|
||||
|
||||
/*
|
||||
* Pack the values into a tensor with rank one higher than each tensor in
|
||||
* values.
|
||||
*/
|
||||
LoDTensor Stack() const;
|
||||
|
||||
/*
|
||||
* Unstacks the given division of a rank-`R` tensor into rank-`(R-1)` tensors.
|
||||
*/
|
||||
void Unstack(const LoDTensor &source) const;
|
||||
|
||||
/*
|
||||
* Unstacks the given division of a rank-`R` tensor into rank-`(R-1)` tensors,
|
||||
* with memory of tensors shared.
|
||||
*/
|
||||
void UnstackShared(const LoDTensor &source) const;
|
||||
|
||||
/*
|
||||
* Return the number of values.
|
||||
*/
|
||||
size_t size() const;
|
||||
|
||||
protected:
|
||||
void Unstack(const LoDTensor &source, bool data_shared) const;
|
||||
|
||||
LoDTensor LodPackTwo(const LoDTensor &pre, const LoDTensor &cur,
|
||||
size_t level) const;
|
||||
|
||||
private:
|
||||
mutable std::vector<LoDTensor> values_;
|
||||
}; // class TensorArray
|
||||
|
||||
} // namespace framework
|
||||
} // namespace paddle
|
@ -1,182 +0,0 @@
|
||||
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License. */
|
||||
|
||||
#include "paddle/framework/tensor_array.h"
|
||||
|
||||
#include <gtest/gtest.h>
|
||||
|
||||
namespace paddle {
|
||||
namespace framework {
|
||||
|
||||
class TensorArrayTester : public ::testing::Test {
|
||||
protected:
|
||||
void SetUp() override {
|
||||
LoDTensor source;
|
||||
source.Resize(make_ddim({batch_size, dim}));
|
||||
int* data = source.mutable_data<int>(platform::CPUPlace());
|
||||
for (int i = 0; i < 16 * 32; i++) {
|
||||
data[i] = i;
|
||||
}
|
||||
ta.Unstack(source);
|
||||
}
|
||||
|
||||
TensorArray ta;
|
||||
const int batch_size = 16;
|
||||
const int dim = 32;
|
||||
};
|
||||
|
||||
TEST_F(TensorArrayTester, Read) {
|
||||
for (int i = 0; i < batch_size; i++) {
|
||||
const auto& tensor = ta.Read(i);
|
||||
ASSERT_EQ(tensor.dims()[0], 1);
|
||||
ASSERT_EQ(tensor.dims()[1], dim);
|
||||
}
|
||||
}
|
||||
|
||||
TEST_F(TensorArrayTester, Write) {
|
||||
LoDTensor source;
|
||||
source.Resize(make_ddim({1, dim}));
|
||||
for (int i = 0; i < dim; i++) {
|
||||
*(source.mutable_data<int>(platform::CPUPlace()) + i) = i;
|
||||
}
|
||||
|
||||
ta.Write(2, source);
|
||||
|
||||
const auto& tensor = ta.Read(2);
|
||||
for (int i = 0; i < dim; i++) {
|
||||
EXPECT_EQ(*(tensor.data<int>() + i), *(source.data<int>() + i));
|
||||
}
|
||||
}
|
||||
|
||||
TEST_F(TensorArrayTester, WriteShared) {
|
||||
LoDTensor source;
|
||||
source.Resize(make_ddim({1, dim}));
|
||||
for (int i = 0; i < dim; i++) {
|
||||
*(source.mutable_data<int>(platform::CPUPlace()) + i) = i;
|
||||
}
|
||||
|
||||
ta.WriteShared(2, source);
|
||||
|
||||
const auto& tensor = ta.Read(2);
|
||||
for (int i = 0; i < dim; i++) {
|
||||
EXPECT_EQ(*(tensor.data<int>() + i), *(source.data<int>() + i));
|
||||
}
|
||||
|
||||
EXPECT_EQ(source.data<int>(), tensor.data<int>());
|
||||
}
|
||||
|
||||
class TensorArrayPackTester : public ::testing::Test {
|
||||
protected:
|
||||
virtual void SetUp() override {
|
||||
lod.push_back(std::vector<size_t>{0, 2, 9, 13});
|
||||
|
||||
source.set_lod(lod);
|
||||
source.Resize(make_ddim({13, 128}));
|
||||
source.mutable_data<int>(platform::CPUPlace());
|
||||
|
||||
// content of each setence: 0 1 2 3 4
|
||||
const auto& level = lod.front();
|
||||
for (size_t i = 0; i < level.size() - 1; i++) {
|
||||
size_t begin = level[i];
|
||||
size_t end = level[i + 1];
|
||||
for (size_t j = begin; j < end; j++) {
|
||||
auto record = source.Slice(j, j + 1);
|
||||
for (int dim = 0; dim < 128; dim++) {
|
||||
record.mutable_data<int>(platform::CPUPlace())[dim] = j - begin;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// unpack
|
||||
meta = ta.Unpack(source, 0, true);
|
||||
}
|
||||
|
||||
LoD lod;
|
||||
TensorArray ta;
|
||||
LoDTensor source;
|
||||
std::vector<DySeqMeta> meta;
|
||||
};
|
||||
|
||||
TEST_F(TensorArrayPackTester, Unpack) {
|
||||
ASSERT_EQ(ta.size(), 7UL);
|
||||
|
||||
const auto& t0 = ta.Read(0);
|
||||
const auto& t1 = ta.Read(1);
|
||||
|
||||
ASSERT_EQ(t0.data<int>()[0], int(0));
|
||||
ASSERT_EQ(t1.data<int>()[0], int(1));
|
||||
}
|
||||
|
||||
TEST_F(TensorArrayPackTester, Pack) {
|
||||
LoDTensor packed = ta.Pack(0, meta, lod);
|
||||
}
|
||||
|
||||
TEST_F(TensorArrayTester, size) {
|
||||
ASSERT_EQ(ta.size(), static_cast<size_t>(batch_size));
|
||||
}
|
||||
|
||||
TEST(TensorArray, LodPack) {
|
||||
// three time steps, each step stores a LoDTensors
|
||||
// - [0] [1]
|
||||
// - [2 3], [4 5]
|
||||
// - [6 7] [] [8], [9, 10]
|
||||
// try to get a LoDTensor with content:
|
||||
// - [0 2 6]
|
||||
// - [0 2 7]
|
||||
// - [0 3]
|
||||
// - [1 4 8]
|
||||
// - [1 5 9]
|
||||
// - [1 5 10]
|
||||
std::array<LoDTensor, 3> tensors;
|
||||
tensors[0].Resize(make_ddim({2, 1}));
|
||||
tensors[1].Resize(make_ddim({4, 1}));
|
||||
tensors[2].Resize(make_ddim({5, 1}));
|
||||
int index = 0;
|
||||
for (auto& t : tensors) {
|
||||
t.mutable_data<int>(platform::CPUPlace());
|
||||
for (int i = 0; i < t.dims()[0]; i++) {
|
||||
t.data<int>()[i] = index;
|
||||
index++;
|
||||
}
|
||||
}
|
||||
|
||||
std::array<LoD, 3> lods;
|
||||
std::vector<std::vector<size_t>> levels{
|
||||
{0, 1, 2}, {0, 2, 4}, {0, 2, 2, 3, 5}};
|
||||
for (int i = 0; i < 3; i++) {
|
||||
lods[i].emplace_back(levels[i].begin(), levels[i].end());
|
||||
}
|
||||
|
||||
TensorArray ta;
|
||||
for (int i = 0; i < 3; i++) {
|
||||
tensors[i].set_lod(lods[i]);
|
||||
ta.Write(i, tensors[i]);
|
||||
}
|
||||
|
||||
auto merged = ta.LodPack(0);
|
||||
|
||||
std::vector<int> target_tensor_data{{0, 2, 6, // 0
|
||||
0, 2, 7, // 1
|
||||
0, 3, // 2
|
||||
1, 4, 8, // 3
|
||||
1, 5, 9, // 5
|
||||
1, 5, 10}};
|
||||
EXPECT_EQ(merged.dims()[0], (int)target_tensor_data.size());
|
||||
for (size_t i = 0; i < target_tensor_data.size(); i++) {
|
||||
EXPECT_EQ(target_tensor_data[i], merged.data<int>()[i]);
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace framework
|
||||
} // namespace paddle
|
@ -0,0 +1,153 @@
|
||||
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License. */
|
||||
|
||||
#pragma once
|
||||
#include "paddle/framework/tensor.h"
|
||||
|
||||
namespace paddle {
|
||||
namespace framework {
|
||||
|
||||
/**
|
||||
* @brief Copy the content of external tensor to a new place.
|
||||
*
|
||||
* @param[in] src The external tensor.
|
||||
* @param[in] dst_place The dst place.
|
||||
* @param[in] ctx The device context contains device resources.
|
||||
*
|
||||
* @note CopyFrom supports CPU <-> GPU, GPU <-> GPU.
|
||||
*/
|
||||
|
||||
inline void CopyFrom(const Tensor& src, const platform::Place& dst_place,
|
||||
const platform::DeviceContext& ctx, Tensor* dst) {
|
||||
src.check_memory_size();
|
||||
|
||||
dst->Resize(src.dims());
|
||||
auto src_place = src.place();
|
||||
auto src_ptr = src.data<void>();
|
||||
|
||||
auto dst_ptr = dst->mutable_data(dst_place, src.type());
|
||||
|
||||
auto size = src.numel() * SizeOfType(src.type());
|
||||
|
||||
if (platform::is_cpu_place(src_place) && platform::is_cpu_place(dst_place)) {
|
||||
memory::Copy(boost::get<platform::CPUPlace>(dst_place), dst_ptr,
|
||||
boost::get<platform::CPUPlace>(src_place), src_ptr, size);
|
||||
}
|
||||
#ifdef PADDLE_WITH_CUDA
|
||||
else if (platform::is_gpu_place(src_place) && // NOLINT
|
||||
platform::is_cpu_place(dst_place)) {
|
||||
auto src_gpu_place = boost::get<platform::GPUPlace>(src_place);
|
||||
auto dst_cpu_place = boost::get<platform::CPUPlace>(dst_place);
|
||||
auto ctx_place = ctx.GetPlace();
|
||||
PADDLE_ENFORCE(platform::is_gpu_place(ctx_place));
|
||||
auto ctx_gpu_place = boost::get<platform::GPUPlace>(ctx_place);
|
||||
PADDLE_ENFORCE_EQ(src_gpu_place, ctx_gpu_place);
|
||||
memory::Copy(
|
||||
dst_cpu_place, dst_ptr, src_gpu_place, src_ptr, size,
|
||||
reinterpret_cast<const platform::CUDADeviceContext&>(ctx).stream());
|
||||
} else if (platform::is_cpu_place(src_place) &&
|
||||
platform::is_gpu_place(dst_place)) {
|
||||
auto src_cpu_place = boost::get<platform::CPUPlace>(src_place);
|
||||
auto dst_gpu_place = boost::get<platform::GPUPlace>(dst_place);
|
||||
auto ctx_place = ctx.GetPlace();
|
||||
PADDLE_ENFORCE(platform::is_gpu_place(ctx_place));
|
||||
auto ctx_gpu_place = boost::get<platform::GPUPlace>(ctx_place);
|
||||
PADDLE_ENFORCE_EQ(dst_gpu_place, ctx_gpu_place);
|
||||
memory::Copy(
|
||||
dst_gpu_place, dst_ptr, src_cpu_place, src_ptr, size,
|
||||
reinterpret_cast<const platform::CUDADeviceContext&>(ctx).stream());
|
||||
} else if (platform::is_gpu_place(src_place) &&
|
||||
platform::is_gpu_place(dst_place)) {
|
||||
auto src_gpu_place = boost::get<platform::GPUPlace>(src_place);
|
||||
auto dst_gpu_place = boost::get<platform::GPUPlace>(dst_place);
|
||||
auto ctx_place = ctx.GetPlace();
|
||||
PADDLE_ENFORCE(platform::is_gpu_place(ctx_place));
|
||||
auto ctx_gpu_place = boost::get<platform::GPUPlace>(ctx_place);
|
||||
PADDLE_ENFORCE_EQ(src_gpu_place, ctx_gpu_place);
|
||||
memory::Copy(
|
||||
dst_gpu_place, dst_ptr, src_gpu_place, src_ptr, size,
|
||||
reinterpret_cast<const platform::CUDADeviceContext&>(ctx).stream());
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
/**
|
||||
* @brief Copy the content of an external vector to a tensor.
|
||||
*
|
||||
* @param[in] src The external tensor.
|
||||
* @param[in] ctx The device context contains device resources.
|
||||
*
|
||||
* * @note CopyFromVector assumes that the tensor has been resized
|
||||
* before invoking.
|
||||
*/
|
||||
template <typename T>
|
||||
inline void CopyFromVector(const std::vector<T>& src,
|
||||
const platform::DeviceContext& ctx, Tensor* dst) {
|
||||
auto dst_place = ctx.GetPlace();
|
||||
auto src_ptr = static_cast<const void*>(src.data());
|
||||
platform::CPUPlace src_place;
|
||||
dst->Resize({static_cast<int64_t>(src.size())});
|
||||
auto dst_ptr = static_cast<void*>(dst->mutable_data<T>(dst_place));
|
||||
auto size = src.size() * sizeof(T);
|
||||
|
||||
if (platform::is_cpu_place(dst_place)) {
|
||||
memory::Copy(boost::get<platform::CPUPlace>(dst_place), dst_ptr, src_place,
|
||||
src_ptr, size);
|
||||
}
|
||||
#ifdef PADDLE_WITH_CUDA
|
||||
else if (platform::is_gpu_place(dst_place)) { // NOLINT
|
||||
memory::Copy(
|
||||
boost::get<platform::GPUPlace>(dst_place), dst_ptr, src_place, src_ptr,
|
||||
size,
|
||||
reinterpret_cast<const platform::CUDADeviceContext&>(ctx).stream());
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
/**
|
||||
* @brief Copy the content of a tensor to a vector
|
||||
*
|
||||
* @param[in] src The external tensor.
|
||||
* @param[in] ctx The device context contains device resources.
|
||||
*
|
||||
* * @note CopyFromVector assumes that the tensor has been resized
|
||||
* before invoking.
|
||||
*/
|
||||
template <typename T>
|
||||
inline void CopyToVector(const Tensor& src, const platform::DeviceContext& ctx,
|
||||
std::vector<T>* dst) {
|
||||
auto src_ptr = static_cast<const void*>(src.data<T>());
|
||||
auto size = src.numel() * sizeof(T);
|
||||
|
||||
platform::CPUPlace dst_place;
|
||||
dst->resize(src.numel());
|
||||
auto dst_ptr = static_cast<void*>(dst->data());
|
||||
|
||||
if (platform::is_cpu_place(src.place())) {
|
||||
memory::Copy(dst_place, dst_ptr, boost::get<platform::CPUPlace>(src.place()),
|
||||
src_ptr, size);
|
||||
}
|
||||
#ifdef PADDLE_WITH_CUDA
|
||||
else if (platform::is_gpu_place(src.place())) { // NOLINT
|
||||
memory::Copy(
|
||||
dst_place, dst_ptr, boost::get<platform::GPUPlace>(src.place()), src_ptr,
|
||||
size,
|
||||
reinterpret_cast<const platform::CUDADeviceContext&>(ctx).stream());
|
||||
}
|
||||
#endif
|
||||
|
||||
}
|
||||
|
||||
} // namespace framework
|
||||
} // namespace paddle
|
@ -0,0 +1,228 @@
|
||||
/*
|
||||
Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
*/
|
||||
|
||||
#include "paddle/framework/tensor_util.h"
|
||||
#include <gtest/gtest.h>
|
||||
#include <string>
|
||||
|
||||
namespace paddle {
|
||||
namespace framework {
|
||||
TEST(CopyFrom, Tensor) {
|
||||
Tensor src_tensor;
|
||||
Tensor dst_tensor;
|
||||
platform::CPUDeviceContext cpu_ctx((platform::CPUPlace()));
|
||||
|
||||
int* src_ptr =
|
||||
src_tensor.mutable_data<int>(make_ddim({3, 3}), platform::CPUPlace());
|
||||
|
||||
int arr[9] = {1, 2, 3, 4, 5, 6, 7, 8, 9};
|
||||
memcpy(src_ptr, arr, 9 * sizeof(int));
|
||||
|
||||
auto cpu_place = new platform::CPUPlace();
|
||||
CopyFrom(src_tensor, *cpu_place, cpu_ctx, &dst_tensor);
|
||||
|
||||
const int* dst_ptr = dst_tensor.data<int>();
|
||||
ASSERT_NE(src_ptr, dst_ptr);
|
||||
for (size_t i = 0; i < 9; ++i) {
|
||||
EXPECT_EQ(src_ptr[i], dst_ptr[i]);
|
||||
}
|
||||
|
||||
Tensor slice_tensor = src_tensor.Slice(1, 2);
|
||||
CopyFrom(slice_tensor, *cpu_place, cpu_ctx, &dst_tensor);
|
||||
const int* slice_ptr = slice_tensor.data<int>();
|
||||
dst_ptr = dst_tensor.data<int>();
|
||||
ASSERT_NE(dst_ptr, slice_ptr);
|
||||
for (size_t i = 0; i < 3; ++i) {
|
||||
EXPECT_EQ(dst_ptr[i], slice_ptr[i]);
|
||||
}
|
||||
#ifdef PADDLE_WITH_CUDA
|
||||
{
|
||||
Tensor src_tensor;
|
||||
Tensor gpu_tensor;
|
||||
Tensor dst_tensor;
|
||||
|
||||
int* src_ptr =
|
||||
src_tensor.mutable_data<int>(make_ddim({3, 3}), platform::CPUPlace());
|
||||
|
||||
int arr[9] = {1, 2, 3, 4, 5, 6, 7, 8, 9};
|
||||
memcpy(src_ptr, arr, 9 * sizeof(int));
|
||||
|
||||
// CPU Tensor to GPU Tensor
|
||||
auto gpu_place = new platform::GPUPlace(0);
|
||||
platform::CUDADeviceContext gpu_ctx(*gpu_place);
|
||||
CopyFrom(src_tensor, *gpu_place, gpu_ctx, &gpu_tensor);
|
||||
|
||||
// GPU Tensor to CPU Tensor
|
||||
auto cpu_place = new platform::CPUPlace();
|
||||
CopyFrom(gpu_tensor, *cpu_place, gpu_ctx, &dst_tensor);
|
||||
|
||||
// Sync before Compare Tensors
|
||||
gpu_ctx.Wait();
|
||||
const int* dst_ptr = dst_tensor.data<int>();
|
||||
ASSERT_NE(src_ptr, dst_ptr);
|
||||
for (size_t i = 0; i < 9; ++i) {
|
||||
EXPECT_EQ(src_ptr[i], dst_ptr[i]);
|
||||
}
|
||||
|
||||
Tensor slice_tensor = src_tensor.Slice(1, 2);
|
||||
|
||||
// CPU Slice Tensor to GPU Tensor
|
||||
CopyFrom(slice_tensor, *gpu_place, gpu_ctx, &gpu_tensor);
|
||||
|
||||
// GPU Tensor to CPU Tensor
|
||||
CopyFrom(gpu_tensor, *cpu_place, gpu_ctx, &dst_tensor);
|
||||
|
||||
// Sync before Compare Slice Tensors
|
||||
gpu_ctx.Wait();
|
||||
const int* slice_ptr = slice_tensor.data<int>();
|
||||
dst_ptr = dst_tensor.data<int>();
|
||||
ASSERT_NE(dst_ptr, slice_ptr);
|
||||
for (size_t i = 0; i < 3; ++i) {
|
||||
EXPECT_EQ(dst_ptr[i], slice_ptr[i]);
|
||||
}
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
TEST(CopyFromVector, Tensor) {
|
||||
using namespace paddle::framework;
|
||||
using namespace paddle::platform;
|
||||
{
|
||||
std::vector<int> src_vec = {1, 2, 3, 4, 5, 6, 7, 8, 9};
|
||||
Tensor cpu_tensor;
|
||||
|
||||
// Copy to CPU Tensor
|
||||
cpu_tensor.Resize(make_ddim({3, 3}));
|
||||
auto cpu_place = new paddle::platform::CPUPlace();
|
||||
CPUDeviceContext cpu_ctx(*cpu_place);
|
||||
CopyFromVector<int>(src_vec, cpu_ctx, &cpu_tensor);
|
||||
|
||||
// Compare Tensors
|
||||
const int* cpu_ptr = cpu_tensor.data<int>();
|
||||
const int* src_ptr = src_vec.data();
|
||||
ASSERT_NE(src_ptr, cpu_ptr);
|
||||
for (size_t i = 0; i < 9; ++i) {
|
||||
EXPECT_EQ(src_ptr[i], cpu_ptr[i]);
|
||||
}
|
||||
|
||||
src_vec.erase(src_vec.begin(), src_vec.begin() + 5);
|
||||
cpu_tensor.Resize(make_ddim({2, 2}));
|
||||
CopyFromVector<int>(src_vec, cpu_ctx, &cpu_tensor);
|
||||
cpu_ptr = cpu_tensor.data<int>();
|
||||
src_ptr = src_vec.data();
|
||||
ASSERT_NE(src_ptr, cpu_ptr);
|
||||
for (size_t i = 0; i < 5; ++i) {
|
||||
EXPECT_EQ(src_ptr[i], cpu_ptr[i]);
|
||||
}
|
||||
|
||||
delete cpu_place;
|
||||
}
|
||||
|
||||
#ifdef PADDLE_WITH_CUDA
|
||||
{
|
||||
std::vector<int> src_vec = {1, 2, 3, 4, 5, 6, 7, 8, 9};
|
||||
Tensor cpu_tensor;
|
||||
Tensor gpu_tensor;
|
||||
Tensor dst_tensor;
|
||||
|
||||
// Copy to CPU Tensor
|
||||
cpu_tensor.Resize(make_ddim({3, 3}));
|
||||
auto cpu_place = new paddle::platform::CPUPlace();
|
||||
CPUDeviceContext cpu_ctx(*cpu_place);
|
||||
CopyFromVector<int>(src_vec, cpu_ctx, &cpu_tensor);
|
||||
|
||||
// Copy to GPUTensor
|
||||
gpu_tensor.Resize(make_ddim({3, 3}));
|
||||
auto gpu_place = new paddle::platform::GPUPlace();
|
||||
CUDADeviceContext gpu_ctx(*gpu_place);
|
||||
CopyFromVector<int>(src_vec, gpu_ctx, &gpu_tensor);
|
||||
// Copy from GPU to CPU tensor for comparison
|
||||
CopyFrom(gpu_tensor, *cpu_place, gpu_ctx, &dst_tensor);
|
||||
|
||||
// Sync before Compare Tensors
|
||||
gpu_ctx.Wait();
|
||||
const int* src_ptr = src_vec.data();
|
||||
const int* cpu_ptr = cpu_tensor.data<int>();
|
||||
const int* dst_ptr = dst_tensor.data<int>();
|
||||
ASSERT_NE(src_ptr, cpu_ptr);
|
||||
ASSERT_NE(src_ptr, dst_ptr);
|
||||
for (size_t i = 0; i < 9; ++i) {
|
||||
EXPECT_EQ(src_ptr[i], cpu_ptr[i]);
|
||||
EXPECT_EQ(src_ptr[i], dst_ptr[i]);
|
||||
}
|
||||
|
||||
src_vec.erase(src_vec.begin(), src_vec.begin() + 5);
|
||||
|
||||
cpu_tensor.Resize(make_ddim({2, 2}));
|
||||
CopyFromVector<int>(src_vec, cpu_ctx, &cpu_tensor);
|
||||
gpu_tensor.Resize(make_ddim({2, 2}));
|
||||
CopyFromVector<int>(src_vec, gpu_ctx, &gpu_tensor);
|
||||
CopyFrom(gpu_tensor, *cpu_place, gpu_ctx, &dst_tensor);
|
||||
|
||||
// Sync before Compare Tensors
|
||||
gpu_ctx.Wait();
|
||||
src_ptr = src_vec.data();
|
||||
cpu_ptr = cpu_tensor.data<int>();
|
||||
dst_ptr = dst_tensor.data<int>();
|
||||
ASSERT_NE(src_ptr, cpu_ptr);
|
||||
ASSERT_NE(src_ptr, dst_ptr);
|
||||
for (size_t i = 0; i < 5; ++i) {
|
||||
EXPECT_EQ(src_ptr[i], cpu_ptr[i]);
|
||||
EXPECT_EQ(src_ptr[i], dst_ptr[i]);
|
||||
}
|
||||
|
||||
delete cpu_place;
|
||||
delete gpu_place;
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
TEST(CopyToVector, Tensor) {
|
||||
using namespace paddle::framework;
|
||||
using namespace paddle::platform;
|
||||
{
|
||||
Tensor src;
|
||||
int* src_ptr = src.mutable_data<int>({3, 3}, CPUPlace());
|
||||
for (int i = 0; i < 3 * 3; ++i) {
|
||||
src_ptr[i] = i;
|
||||
}
|
||||
|
||||
CPUPlace place;
|
||||
CPUDeviceContext cpu_ctx(place);
|
||||
std::vector<int> dst;
|
||||
CopyToVector<int>(src, cpu_ctx, &dst);
|
||||
|
||||
for (int i = 0; i < 3 * 3; ++i) {
|
||||
EXPECT_EQ(src_ptr[i], dst[i]);
|
||||
}
|
||||
}
|
||||
#ifdef PADDLE_WITH_CUDA
|
||||
{
|
||||
std::vector<int> src_vec = {1, 2, 3, 4, 5, 6, 7, 8, 9};
|
||||
Tensor gpu_tensor;
|
||||
GPUPlace place;
|
||||
CUDADeviceContext gpu_ctx(place);
|
||||
CopyFromVector<int>(src_vec, gpu_ctx, &gpu_tensor);
|
||||
|
||||
std::vector<int> dst;
|
||||
CopyToVector<int>(gpu_tensor, gpu_ctx, &dst);
|
||||
|
||||
for (int i = 0; i < 3 * 3; ++i) {
|
||||
EXPECT_EQ(src_vec[i], dst[i]);
|
||||
}
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
} // namespace framework
|
||||
} // namespace paddle
|
File diff suppressed because it is too large
Load Diff
@ -1,233 +0,0 @@
|
||||
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License. */
|
||||
|
||||
#pragma once
|
||||
|
||||
#ifdef PADDLE_WITH_TESTING
|
||||
#include "gtest/gtest.h"
|
||||
#endif
|
||||
|
||||
#include "paddle/framework/lod_tensor.h"
|
||||
#include "paddle/framework/operator.h"
|
||||
#include "paddle/framework/tensor_array.h"
|
||||
#include "paddle/framework/variable.h"
|
||||
#include "paddle/operators/rnn/recurrent_op_utils.h"
|
||||
|
||||
namespace paddle {
|
||||
namespace operators {
|
||||
|
||||
class RNNAlgorithm {
|
||||
public:
|
||||
enum ComputeMode { kForward = 0, kBackward = 1 };
|
||||
static const std::array<rnn::ArgumentName, 2> kArgNames;
|
||||
using value_type = float;
|
||||
|
||||
/*
|
||||
* Different `Run` method for forward and backward, `_` is just for template
|
||||
* specifialization.
|
||||
*/
|
||||
template <ComputeMode _>
|
||||
void Run(const framework::Scope& scope, const framework::OperatorBase& op,
|
||||
const platform::DeviceContext& dev_ctx);
|
||||
/*
|
||||
* Split the inputs(LoDTensors) to segments for each time step.
|
||||
*/
|
||||
void SplitInputs();
|
||||
|
||||
/*
|
||||
* Create step-scopes to store temporary outputs in each time steps.
|
||||
*/
|
||||
void CreateScopes();
|
||||
|
||||
/*
|
||||
* Link TensorArray steps to the corresponding variables located in
|
||||
* step-scopes.
|
||||
*/
|
||||
void WriteStepInputs();
|
||||
|
||||
/*
|
||||
* Write output of each step to the corresponding TensorArray.
|
||||
*/
|
||||
void WriteStepOutputs();
|
||||
|
||||
/*
|
||||
* Initialize the states, each state will have a corresponding pre-state,
|
||||
* which share the memory with the state in the previous time state. The
|
||||
* pre-state in the first time step will be initialized with an zero tensor or
|
||||
* a tensor in parent scope if is provided.
|
||||
*/
|
||||
void InitStates();
|
||||
|
||||
/*
|
||||
* Create state variables for each time step.
|
||||
*/
|
||||
void CreateState(const rnn::StateAttr& state, size_t step);
|
||||
|
||||
/*
|
||||
* Link pre-state variable in current scope to the state variable in the
|
||||
* previous time step (scope) by reference.
|
||||
*/
|
||||
void LinkState(const rnn::StateAttr& state, size_t step);
|
||||
|
||||
/*
|
||||
* Link the pre-state of the first time step to the `boot-state` in parent's
|
||||
* scope.
|
||||
*/
|
||||
void LinkInitialState(const rnn::StateAttr& state);
|
||||
|
||||
/*
|
||||
* Copy the gradient from `pre-state` in the first step-scope to the
|
||||
* `boot-state` in parent's scope.
|
||||
*/
|
||||
void ExportInitialStateGradient(const rnn::StateAttr& state);
|
||||
|
||||
/*
|
||||
* Calculate time steps.
|
||||
*/
|
||||
void RunSteps();
|
||||
|
||||
/*
|
||||
* Concatenate outputs in each time step and generate a LoDTensor.
|
||||
*/
|
||||
void ConcatOutputs();
|
||||
|
||||
void SetComputeMode(ComputeMode mode) { mode_ = mode; }
|
||||
bool IsForward() const { return mode_ == ComputeMode::kForward; }
|
||||
bool IsBackward() const { return mode_ == ComputeMode::kBackward; }
|
||||
|
||||
/*
|
||||
* set a step unit that is created according to a RecurrentOp's step unit.
|
||||
*/
|
||||
void SetStepUnit(std::unique_ptr<framework::OperatorBase> step_unit) {
|
||||
PADDLE_ENFORCE_NOT_NULL(step_unit);
|
||||
step_unit_ = std::move(step_unit);
|
||||
}
|
||||
const framework::OperatorBase& GetStepUnit() const { return *step_unit_; }
|
||||
|
||||
const framework::TensorArray& state(const std::string& name) const {
|
||||
auto it = states_.find(name);
|
||||
PADDLE_ENFORCE(it != states_.end());
|
||||
return it->second;
|
||||
}
|
||||
const framework::TensorArray& step_input(const std::string& name) const {
|
||||
auto it = step_inputs_.find(name);
|
||||
PADDLE_ENFORCE(it != step_inputs_.end());
|
||||
return it->second;
|
||||
}
|
||||
const framework::TensorArray& step_output(const std::string& name) const {
|
||||
auto it = step_outputs_.find(name);
|
||||
PADDLE_ENFORCE(it != step_outputs_.end());
|
||||
return it->second;
|
||||
}
|
||||
|
||||
protected:
|
||||
struct ArgCache {
|
||||
framework::Scope const* scope;
|
||||
std::vector<framework::Scope*>* scopes;
|
||||
std::map<std::string, framework::Variable*> inputs;
|
||||
std::map<std::string, framework::Variable*> outputs;
|
||||
platform::DeviceContext const* dev_ctx;
|
||||
|
||||
size_t num_steps{0};
|
||||
|
||||
void Init(const rnn::ArgumentName& name, const framework::OperatorBase& op,
|
||||
const framework::Scope& scope,
|
||||
platform::DeviceContext const* dev_ctx, rnn::Argument* arg);
|
||||
|
||||
framework::Scope& GetScope(size_t index) {
|
||||
PADDLE_ENFORCE_LT(index, num_steps);
|
||||
return *scopes->at(index);
|
||||
}
|
||||
|
||||
framework::LoDTensor* GetTensor(const framework::Scope& scope,
|
||||
const std::string& name);
|
||||
|
||||
private:
|
||||
void InitArgument(const rnn::ArgumentName& name,
|
||||
const framework::OperatorBase& op, rnn::Argument* arg);
|
||||
void CacheScopes(const framework::Scope& scope, const rnn::Argument& arg);
|
||||
void CacheInlinks(const framework::Scope& scope,
|
||||
const std::vector<std::string>& names);
|
||||
void CacheOutlinks(const framework::Scope& scope,
|
||||
const std::vector<std::string>& names);
|
||||
framework::Variable* GetVariable(const framework::Scope& scope,
|
||||
const std::string& name);
|
||||
};
|
||||
|
||||
private:
|
||||
std::unique_ptr<framework::OperatorBase> step_unit_;
|
||||
std::map<std::string, framework::TensorArray> states_;
|
||||
std::map<std::string, framework::TensorArray> step_inputs_;
|
||||
std::map<std::string, framework::TensorArray> step_outputs_;
|
||||
std::map<std::string, std::vector<framework::DySeqMeta>> dy_seq_metas_;
|
||||
rnn::Argument arg_;
|
||||
ArgCache cache_;
|
||||
ComputeMode mode_{ComputeMode::kForward};
|
||||
|
||||
#ifdef PADDLE_WITH_TESTING
|
||||
// test forward
|
||||
friend class RNNAlgorithmTestHelper;
|
||||
FRIEND_TEST(RNNAlgorithmTestHelper, SplitInputs);
|
||||
FRIEND_TEST(RNNAlgorithmTestHelper, CreateCache);
|
||||
FRIEND_TEST(RNNAlgorithmTestHelper, CreateScopes);
|
||||
FRIEND_TEST(RNNAlgorithmTestHelper, WriteStepInputs);
|
||||
FRIEND_TEST(RNNAlgorithmTestHelper, WriteStepOutputs);
|
||||
FRIEND_TEST(RNNAlgorithmTestHelper, InitStates);
|
||||
FRIEND_TEST(RNNAlgorithmTestHelper, ConcatOutputs);
|
||||
// TODO(superjom) test backward
|
||||
#endif
|
||||
};
|
||||
|
||||
class DynamicRecurrentOp : public framework::OperatorBase {
|
||||
public:
|
||||
DynamicRecurrentOp(const std::string& type,
|
||||
const framework::VariableNameMap& inputs,
|
||||
const framework::VariableNameMap& outputs,
|
||||
const framework::AttributeMap& attrs)
|
||||
: OperatorBase(type, inputs, outputs, attrs) {}
|
||||
|
||||
DynamicRecurrentOp(const DynamicRecurrentOp& o)
|
||||
: framework::OperatorBase(
|
||||
static_cast<const framework::OperatorBase&>(o)) {
|
||||
PADDLE_THROW("Not implemented");
|
||||
}
|
||||
|
||||
void Run(const framework::Scope& scope,
|
||||
const platform::DeviceContext& dev_ctx) const override;
|
||||
|
||||
mutable RNNAlgorithm rnn;
|
||||
};
|
||||
|
||||
class DynamicRecurrentGradientOp : public framework::OperatorBase {
|
||||
public:
|
||||
DynamicRecurrentGradientOp(const std::string& type,
|
||||
const framework::VariableNameMap& inputs,
|
||||
const framework::VariableNameMap& outputs,
|
||||
const framework::AttributeMap& attrs)
|
||||
: OperatorBase(type, inputs, outputs, attrs) {}
|
||||
|
||||
DynamicRecurrentGradientOp(const DynamicRecurrentGradientOp& o)
|
||||
: framework::OperatorBase(
|
||||
static_cast<const framework::OperatorBase&>(o)) {
|
||||
PADDLE_THROW("Not implemented");
|
||||
}
|
||||
|
||||
void Run(const framework::Scope& scope,
|
||||
const platform::DeviceContext& dev_ctx) const override;
|
||||
|
||||
mutable RNNAlgorithm rnn;
|
||||
};
|
||||
|
||||
} // namespace operators
|
||||
} // namespace paddle
|
@ -1,217 +0,0 @@
|
||||
#include "paddle/operators/dynamic_recurrent_op.h"
|
||||
|
||||
#include <gtest/gtest.h>
|
||||
|
||||
#include "paddle/framework/ddim.h"
|
||||
#include "paddle/framework/lod_tensor.h"
|
||||
#include "paddle/framework/op_desc.h"
|
||||
#include "paddle/framework/op_registry.h"
|
||||
#include "paddle/operators/net_op.h"
|
||||
|
||||
namespace paddle {
|
||||
namespace operators {
|
||||
|
||||
using framework::Scope;
|
||||
using framework::TensorArray;
|
||||
using framework::LoDTensor;
|
||||
using framework::Variable;
|
||||
|
||||
class TestOp : public framework::OperatorBase {
|
||||
public:
|
||||
using framework::OperatorBase::OperatorBase;
|
||||
DEFINE_OP_CLONE_METHOD(TestOp);
|
||||
void Run(const Scope& scope,
|
||||
const platform::DeviceContext& dev_ctx) const override {}
|
||||
};
|
||||
|
||||
void OpDescNewVar(const std::string& param_name,
|
||||
std::initializer_list<const char*> arguments,
|
||||
paddle::framework::OpDesc::Var* var) {
|
||||
var->set_parameter(param_name);
|
||||
for (auto& arg_name : arguments) {
|
||||
var->add_arguments(arg_name);
|
||||
}
|
||||
}
|
||||
|
||||
// create a LoD tensor in scope with specific dims
|
||||
LoDTensor* CreateVar(Scope& scope, std::string name, framework::DDim dims,
|
||||
const platform::Place& place) {
|
||||
auto* var = scope.Var(name);
|
||||
auto* tensor = var->GetMutable<LoDTensor>();
|
||||
tensor->Resize(dims);
|
||||
tensor->mutable_data<float>(place);
|
||||
return tensor;
|
||||
}
|
||||
|
||||
class RNNAlgorithmTestHelper : public ::testing::Test {
|
||||
protected:
|
||||
const rnn::ArgumentName argname = RNNAlgorithm::kArgNames[0];
|
||||
|
||||
virtual void SetUp() override {
|
||||
CreateGlobalVariables();
|
||||
|
||||
auto op_desc = CreateOpDesc();
|
||||
op = paddle::framework::OpRegistry::CreateOp(op_desc);
|
||||
dop = &(dynamic_cast<DynamicRecurrentOp*>(op.get())->rnn);
|
||||
InitCacheManually();
|
||||
InitStepNet();
|
||||
}
|
||||
|
||||
framework::OpDesc CreateOpDesc() {
|
||||
// create op
|
||||
paddle::framework::OpDesc op_desc;
|
||||
op_desc.set_type("dynamic_recurrent");
|
||||
|
||||
OpDescNewVar(argname.inlinks, {"in0"}, op_desc.add_inputs());
|
||||
OpDescNewVar(argname.initial_states, {"boot_mem"}, op_desc.add_inputs());
|
||||
OpDescNewVar(argname.step_scopes, {"step_scopes"}, op_desc.add_outputs());
|
||||
OpDescNewVar(argname.outlinks, {"out0"}, op_desc.add_outputs());
|
||||
|
||||
// set pre-states
|
||||
auto pre_memories = op_desc.mutable_attrs()->Add();
|
||||
pre_memories->set_name(argname.ex_states);
|
||||
pre_memories->set_type(paddle::framework::AttrType::STRINGS);
|
||||
auto pre_memories_item = pre_memories->add_strings();
|
||||
*pre_memories_item = "mem@pre";
|
||||
|
||||
// set states
|
||||
auto memories = op_desc.mutable_attrs()->Add();
|
||||
memories->set_name(argname.states);
|
||||
memories->set_type(paddle::framework::AttrType::STRINGS);
|
||||
auto memories_item = memories->add_strings();
|
||||
*memories_item = "mem";
|
||||
return op_desc;
|
||||
}
|
||||
|
||||
void CreateGlobalVariables() {
|
||||
platform::CPUPlace place;
|
||||
scope.Var("step_scopes");
|
||||
CreateVar(scope, "boot_mem", framework::make_ddim({10, 20}), place);
|
||||
CreateVar(scope, "out0", framework::make_ddim({10, 20}), place);
|
||||
auto* in0 = CreateVar(scope, "in0", framework::make_ddim({10, 8}), place);
|
||||
// 10 instanes with 4 sentences, length is 4, 3, 2, 1 respectively.
|
||||
framework::LoD in0_lod(1);
|
||||
for (int x : std::vector<int>{0, 4, 7, 9, 10}) {
|
||||
in0_lod[0].push_back(x);
|
||||
}
|
||||
in0->set_lod(in0_lod);
|
||||
in0->Resize(framework::make_ddim({10, 8}));
|
||||
// set the content, each sentence content is seqid.batchid
|
||||
// the seqid starts from 0
|
||||
int start = 0;
|
||||
for (size_t seqid = 0; seqid < in0_lod.size() - 1; seqid++) {
|
||||
for (size_t batchid = 0;
|
||||
batchid < in0_lod[0][seqid + 1] - in0_lod[0][seqid]; batchid++) {
|
||||
float v = seqid + batchid * 0.1;
|
||||
|
||||
for (size_t dim = 0; dim < 8; dim++) {
|
||||
in0->data<float>()[start * 8 + dim] = v;
|
||||
}
|
||||
start++;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void InitCacheManually() {
|
||||
dop->cache_.Init(RNNAlgorithm::kArgNames[0], *op, scope, &device_context,
|
||||
&dop->arg_);
|
||||
}
|
||||
|
||||
void InitStepNet() {
|
||||
std::unique_ptr<framework::OperatorBase> stepnet{new NetOp};
|
||||
dynamic_cast<NetOp*>(stepnet.get())
|
||||
->AppendOp(std::unique_ptr<TestOp>(new TestOp(
|
||||
"test", {{"inputs", {"in0"}}, {"initial_states", {"boot_mem"}}},
|
||||
{{"outputs", {"out0"}}, {"step_scopes", {"step_scopes"}}}, {})));
|
||||
dop->SetStepUnit(std::move(stepnet));
|
||||
}
|
||||
|
||||
protected:
|
||||
RNNAlgorithm* dop;
|
||||
std::unique_ptr<framework::OperatorBase> op;
|
||||
paddle::platform::CPUDeviceContext device_context;
|
||||
paddle::framework::Scope scope;
|
||||
};
|
||||
|
||||
TEST_F(RNNAlgorithmTestHelper, CreateCache) {
|
||||
const rnn::Argument& arg = dop->arg_;
|
||||
ASSERT_EQ(arg.inlinks.size(), 1UL);
|
||||
ASSERT_EQ(arg.outlinks.size(), 1UL);
|
||||
}
|
||||
|
||||
TEST_F(RNNAlgorithmTestHelper, SplitInputs) {
|
||||
dop->SplitInputs();
|
||||
auto& in0_ta = dop->step_inputs_["in0"];
|
||||
ASSERT_EQ(in0_ta.size(), 4UL);
|
||||
|
||||
const auto& batch0 = in0_ta.Read(0);
|
||||
const auto& batch1 = in0_ta.Read(1);
|
||||
const auto& batch2 = in0_ta.Read(2);
|
||||
const auto& batch3 = in0_ta.Read(3);
|
||||
EXPECT_EQ(batch0.dims()[0], 4);
|
||||
EXPECT_EQ(batch1.dims()[0], 3);
|
||||
EXPECT_EQ(batch2.dims()[0], 2);
|
||||
EXPECT_EQ(batch3.dims()[0], 1);
|
||||
}
|
||||
|
||||
TEST_F(RNNAlgorithmTestHelper, CreateScopes) {
|
||||
dop->SplitInputs();
|
||||
dop->CreateScopes();
|
||||
ASSERT_EQ(dop->cache_.num_steps, 4UL);
|
||||
ASSERT_EQ(dop->cache_.scopes->size(), 4UL);
|
||||
}
|
||||
|
||||
TEST_F(RNNAlgorithmTestHelper, WriteStepInputs) {
|
||||
dop->SplitInputs();
|
||||
dop->CreateScopes();
|
||||
dop->WriteStepInputs();
|
||||
|
||||
for (size_t step = 0; step < dop->cache_.num_steps; step++) {
|
||||
auto& scope = dop->cache_.GetScope(step);
|
||||
for (auto name : std::vector<std::string>({"in0"})) {
|
||||
ASSERT_TRUE(scope.FindVar(name) != nullptr);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
TEST_F(RNNAlgorithmTestHelper, WriteStepOutputs) {
|
||||
dop->SplitInputs();
|
||||
dop->CreateScopes();
|
||||
dop->WriteStepInputs();
|
||||
dop->WriteStepOutputs();
|
||||
|
||||
for (size_t step = 0; step < dop->cache_.num_steps; step++) {
|
||||
auto& scope = dop->cache_.GetScope(step);
|
||||
for (auto name : std::vector<std::string>({"out0"})) {
|
||||
ASSERT_TRUE(scope.FindVar(name));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
TEST_F(RNNAlgorithmTestHelper, ConcatOutputs) {
|
||||
// Let's leave this test to python unittest.
|
||||
}
|
||||
|
||||
TEST_F(RNNAlgorithmTestHelper, InitStates) {
|
||||
dop->SetComputeMode(RNNAlgorithm::ComputeMode::kForward);
|
||||
dop->SplitInputs();
|
||||
dop->CreateScopes();
|
||||
dop->WriteStepInputs();
|
||||
dop->WriteStepOutputs();
|
||||
dop->InitStates();
|
||||
|
||||
for (size_t step = 0; step < dop->cache_.num_steps; step++) {
|
||||
auto& scope = dop->cache_.GetScope(step);
|
||||
auto state = scope.FindVar("mem");
|
||||
ASSERT_TRUE(state != nullptr);
|
||||
|
||||
auto* pre_state = scope.FindVar("mem@pre");
|
||||
ASSERT_TRUE(pre_state != nullptr);
|
||||
|
||||
auto* boot_state = scope.FindVar("boot_mem");
|
||||
ASSERT_TRUE(boot_state != nullptr);
|
||||
}
|
||||
}
|
||||
|
||||
} // operators
|
||||
} // namespace paddle
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in new issue