add split and merge lod tensor operator (#5537)
* add split lod tensor operator * add more test cast * clean code * add merge lod tensor operator * fix bug * clean code * add grad operator * make mask support GPU * add commentsmobile_baidu
parent
7c1755d93f
commit
f07a226a4f
@ -0,0 +1,182 @@
|
||||
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License. */
|
||||
|
||||
#include "paddle/framework/op_registry.h"
|
||||
#include "paddle/memory/memcpy.h"
|
||||
|
||||
namespace paddle {
|
||||
namespace operators {
|
||||
|
||||
using LoD = framework::LoD;
|
||||
|
||||
class MergeLoDTensorOp : public framework::OperatorBase {
|
||||
public:
|
||||
MergeLoDTensorOp(const std::string &type,
|
||||
const framework::VariableNameMap &inputs,
|
||||
const framework::VariableNameMap &outputs,
|
||||
const framework::AttributeMap &attrs)
|
||||
: OperatorBase(type, inputs, outputs, attrs) {}
|
||||
void Run(const framework::Scope &scope,
|
||||
const platform::DeviceContext &dev_ctx) const override {
|
||||
auto &x = scope.FindVar(Input("X"))->Get<framework::LoDTensor>();
|
||||
auto &mask = scope.FindVar(Input("Mask"))->Get<framework::LoDTensor>();
|
||||
auto &in_true = scope.FindVar(Input("InTrue"))->Get<framework::LoDTensor>();
|
||||
auto &in_false =
|
||||
scope.FindVar(Input("InFalse"))->Get<framework::LoDTensor>();
|
||||
auto *out =
|
||||
scope.FindVar(Output("Out"))->GetMutable<framework::LoDTensor>();
|
||||
auto level = static_cast<size_t>(Attr<int>("level"));
|
||||
|
||||
auto &mask_dim = mask.dims();
|
||||
|
||||
std::unique_ptr<framework::LoDTensor> cpu_mask{new framework::LoDTensor()};
|
||||
if (platform::is_cpu_place(mask.place())) {
|
||||
cpu_mask->ShareDataWith(mask);
|
||||
} else if (platform::is_gpu_place(mask.place())) {
|
||||
#ifdef PADDLE_WITH_CUDA
|
||||
cpu_mask->CopyFrom(mask, platform::CPUPlace(), dev_ctx);
|
||||
#else
|
||||
PADDLE_THROW("Not supported GPU, Please compile WITH_GPU option");
|
||||
#endif
|
||||
}
|
||||
auto *mask_data = cpu_mask->data<bool>();
|
||||
|
||||
int rank = in_true.dims().size();
|
||||
platform::Place place = in_true.place();
|
||||
std::type_index data_type = in_true.type();
|
||||
framework::DDim in_true_dims =
|
||||
framework::slice_ddim(in_true.dims(), 1, rank);
|
||||
|
||||
int64_t batch_size = in_true.dims()[0] + in_false.dims()[0];
|
||||
|
||||
auto in_true_dim_vec = framework::vectorize(in_true_dims);
|
||||
in_true_dim_vec.insert(in_true_dim_vec.begin(), batch_size);
|
||||
|
||||
framework::DDim out_dims = framework::make_ddim(in_true_dim_vec);
|
||||
out->Resize(out_dims);
|
||||
out->mutable_data(place, data_type);
|
||||
|
||||
auto *out_lod = out->mutable_lod();
|
||||
out_lod->clear();
|
||||
size_t out_offset = 0;
|
||||
|
||||
// Build LoDTensor `out`
|
||||
|
||||
size_t in_true_idx = 0;
|
||||
size_t in_false_idx = 0;
|
||||
for (size_t i = 0; i < static_cast<size_t>(mask_dim[0]); i++) {
|
||||
const framework::LoDTensor *input = nullptr;
|
||||
size_t *in_idx = nullptr;
|
||||
if (static_cast<int>(mask_data[i]) == 0) {
|
||||
input = &in_false;
|
||||
in_idx = &in_false_idx;
|
||||
} else {
|
||||
input = &in_true;
|
||||
in_idx = &in_true_idx;
|
||||
}
|
||||
auto lod_and_offset = framework::GetSubLoDAndAbsoluteOffset(
|
||||
input->lod(), *in_idx, (*in_idx) + 1, 0);
|
||||
auto &lod_length = lod_and_offset.first;
|
||||
|
||||
framework::AppendLoD(out_lod, lod_length);
|
||||
|
||||
size_t start_offset = lod_and_offset.second.first;
|
||||
size_t end_offset = lod_and_offset.second.second;
|
||||
|
||||
PADDLE_ENFORCE_GE(end_offset, start_offset);
|
||||
size_t len = end_offset - start_offset;
|
||||
if (len == 0) {
|
||||
continue;
|
||||
}
|
||||
out->Slice(out_offset, out_offset + len)
|
||||
.CopyFrom(input->Slice(start_offset, end_offset), place, dev_ctx);
|
||||
out_offset += len;
|
||||
(*in_idx) += 1;
|
||||
}
|
||||
|
||||
for (size_t i = 0; i < level; i++) {
|
||||
out_lod->insert(out_lod->begin(), x.lod()[i]);
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
class MergeLoDTensorOpProtoMaker : public framework::OpProtoAndCheckerMaker {
|
||||
public:
|
||||
MergeLoDTensorOpProtoMaker(framework::OpProto *proto,
|
||||
framework::OpAttrChecker *op_checker)
|
||||
: OpProtoAndCheckerMaker(proto, op_checker) {
|
||||
AddInput("X",
|
||||
"The input LoDTensor, contains complete lod information to "
|
||||
"construct the output");
|
||||
AddInput("Mask", "A bool column vector which mask the input");
|
||||
AddInput("InTrue", "The True branch to be merged");
|
||||
AddInput("InFalse", "The False branch to be merged");
|
||||
AddOutput("Out", "The merged output LoDTensor");
|
||||
AddAttr<int>("level", "(int) the specific lod level to rank.")
|
||||
.SetDefault(0)
|
||||
.EqualGreaterThan(0);
|
||||
AddComment(
|
||||
R"DOC(
|
||||
Merge True and False branches of LoDTensor into a single Output,
|
||||
with a mask at certain lod level. X is used to obtain complete
|
||||
lod information. Please refer to SplitLoDTensorOp.)DOC");
|
||||
}
|
||||
};
|
||||
|
||||
class MergeLoDTensorInferShape : public framework::InferShapeBase {
|
||||
public:
|
||||
void operator()(framework::InferShapeContext *context) const override {
|
||||
PADDLE_ENFORCE(context->HasInput("X"),
|
||||
"MergeLoDTensorOp must has input X.");
|
||||
PADDLE_ENFORCE(context->HasInput("Mask"),
|
||||
"MergeLoDTensorOp must has input Mask.");
|
||||
PADDLE_ENFORCE(context->HasInput("InTrue"),
|
||||
"MergeLoDTensorOp must has input InTrue.");
|
||||
PADDLE_ENFORCE(context->HasInput("InFalse"),
|
||||
"MergeLoDTensorOp must has input InFalse.");
|
||||
PADDLE_ENFORCE(context->HasOutput("Out"),
|
||||
"MergeLoDTensorOp must has output Out");
|
||||
|
||||
auto mask_dim = context->GetInputDim("Mask");
|
||||
PADDLE_ENFORCE_EQ(mask_dim.size(), 2);
|
||||
PADDLE_ENFORCE_EQ(mask_dim[1], 1);
|
||||
|
||||
context->SetOutputDim("Out", context->GetInputDim("InTrue"));
|
||||
}
|
||||
};
|
||||
|
||||
class MergeLoDTensorGradMaker : public framework::SingleGradOpDescMaker {
|
||||
public:
|
||||
using framework::SingleGradOpDescMaker::SingleGradOpDescMaker;
|
||||
|
||||
protected:
|
||||
std::unique_ptr<framework::OpDescBind> Apply() const override {
|
||||
auto *grad_op = new framework::OpDescBind();
|
||||
grad_op->SetType("split_lod_tensor");
|
||||
grad_op->SetInput("X", OutputGrad("Out"));
|
||||
grad_op->SetInput("Mask", Input("Mask"));
|
||||
grad_op->SetOutput("OutTrue", InputGrad("InTrue"));
|
||||
grad_op->SetOutput("OutFalse", InputGrad("InFalse"));
|
||||
grad_op->SetAttrMap(Attrs());
|
||||
return std::unique_ptr<framework::OpDescBind>(grad_op);
|
||||
}
|
||||
};
|
||||
|
||||
} // namespace operators
|
||||
} // namespace paddle
|
||||
|
||||
namespace ops = paddle::operators;
|
||||
REGISTER_OPERATOR(merge_lod_tensor, ops::MergeLoDTensorOp,
|
||||
ops::MergeLoDTensorOpProtoMaker,
|
||||
ops::MergeLoDTensorInferShape, ops::MergeLoDTensorGradMaker);
|
@ -0,0 +1,186 @@
|
||||
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License. */
|
||||
|
||||
#include "paddle/framework/op_registry.h"
|
||||
#include "paddle/memory/memcpy.h"
|
||||
|
||||
namespace paddle {
|
||||
namespace operators {
|
||||
|
||||
struct CopyRange {
|
||||
size_t begin;
|
||||
size_t end;
|
||||
};
|
||||
|
||||
using LoD = framework::LoD;
|
||||
|
||||
class SplitLoDTensorOp : public framework::OperatorBase {
|
||||
public:
|
||||
SplitLoDTensorOp(const std::string &type,
|
||||
const framework::VariableNameMap &inputs,
|
||||
const framework::VariableNameMap &outputs,
|
||||
const framework::AttributeMap &attrs)
|
||||
: OperatorBase(type, inputs, outputs, attrs) {}
|
||||
void Run(const framework::Scope &scope,
|
||||
const platform::DeviceContext &dev_ctx) const override {
|
||||
auto &x = scope.FindVar(Input("X"))->Get<framework::LoDTensor>();
|
||||
auto &mask = scope.FindVar(Input("Mask"))->Get<framework::LoDTensor>();
|
||||
auto *out_true =
|
||||
scope.FindVar(Output("OutTrue"))->GetMutable<framework::LoDTensor>();
|
||||
auto *out_false =
|
||||
scope.FindVar(Output("OutFalse"))->GetMutable<framework::LoDTensor>();
|
||||
auto level = static_cast<size_t>(Attr<int>("level"));
|
||||
auto &x_lod = x.lod();
|
||||
auto &mask_dim = mask.dims();
|
||||
|
||||
std::unique_ptr<framework::LoDTensor> cpu_mask{new framework::LoDTensor()};
|
||||
if (platform::is_cpu_place(mask.place())) {
|
||||
cpu_mask->ShareDataWith(mask);
|
||||
} else if (platform::is_gpu_place(mask.place())) {
|
||||
#ifdef PADDLE_WITH_CUDA
|
||||
cpu_mask->CopyFrom(mask, platform::CPUPlace(), dev_ctx);
|
||||
#else
|
||||
PADDLE_THROW("Not supported GPU, Please compile WITH_GPU option");
|
||||
#endif
|
||||
}
|
||||
auto *mask_data = cpu_mask->data<bool>();
|
||||
|
||||
std::vector<std::vector<CopyRange>> copy_ranges(mask_dim[0]);
|
||||
|
||||
// set out_true/out_false lod
|
||||
for (size_t t = 0; t < 2; t++) {
|
||||
LoD *lod = nullptr;
|
||||
if (t == 0) {
|
||||
lod = out_false->mutable_lod();
|
||||
} else {
|
||||
lod = out_true->mutable_lod();
|
||||
}
|
||||
lod->clear();
|
||||
for (size_t i = 0; i < static_cast<size_t>(mask_dim[0]); i++) {
|
||||
if (static_cast<size_t>(mask_data[i]) == t) {
|
||||
size_t start_idx = i;
|
||||
auto lod_and_offset = framework::GetSubLoDAndAbsoluteOffset(
|
||||
x_lod, start_idx, start_idx + 1, level);
|
||||
|
||||
auto &lod_length = lod_and_offset.first;
|
||||
framework::AppendLoD(lod, lod_length);
|
||||
|
||||
size_t start_offset = lod_and_offset.second.first;
|
||||
size_t end_offset = lod_and_offset.second.second;
|
||||
copy_ranges[t].emplace_back(CopyRange{start_offset, end_offset});
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
for (size_t t = 0; t < 2; ++t) {
|
||||
framework::LoDTensor *out;
|
||||
if (t == 0) {
|
||||
out = out_false;
|
||||
} else {
|
||||
out = out_true;
|
||||
}
|
||||
auto &ranges = copy_ranges[t];
|
||||
size_t height = std::accumulate(
|
||||
ranges.begin(), ranges.end(), 0UL,
|
||||
[](size_t a, const CopyRange &b) { return a + b.end - b.begin; });
|
||||
auto x_dim = x.dims();
|
||||
x_dim[0] = static_cast<int64_t>(height);
|
||||
out->Resize(x_dim);
|
||||
out->mutable_data(x.place(), x.type());
|
||||
size_t offset = 0;
|
||||
for (auto &each_range : ranges) {
|
||||
size_t len = each_range.end - each_range.begin;
|
||||
if (len == 0) {
|
||||
continue;
|
||||
}
|
||||
// out[offset: offset+len] = x[each_range.begin: each_range.end]
|
||||
out->Slice(static_cast<int>(offset), static_cast<int>(offset + len))
|
||||
.CopyFrom(x.Slice(static_cast<int>(each_range.begin),
|
||||
static_cast<int>(each_range.end)),
|
||||
x.place(), dev_ctx);
|
||||
offset += len;
|
||||
}
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
class SplitLoDTensorOpProtoMaker : public framework::OpProtoAndCheckerMaker {
|
||||
public:
|
||||
SplitLoDTensorOpProtoMaker(framework::OpProto *proto,
|
||||
framework::OpAttrChecker *op_checker)
|
||||
: OpProtoAndCheckerMaker(proto, op_checker) {
|
||||
AddInput("X", "The input LoDTensor");
|
||||
AddInput("Mask", "A bool column vector which mask the input");
|
||||
AddOutput("OutTrue", "True branch of input LoDTensor");
|
||||
AddOutput("OutFalse", "False branch of input LoDTensor");
|
||||
AddAttr<int>("level", "(int) the specific lod level to split.")
|
||||
.SetDefault(0)
|
||||
.EqualGreaterThan(0);
|
||||
AddComment(
|
||||
R"DOC(
|
||||
Split a LoDTensor with a Mask at certain level. The input LoDTensor
|
||||
has 3 sequence at certain lod level. The Mask is a bool column vector,
|
||||
such as [0, 1, 0] at the same level. The first and third sequence will
|
||||
be send to False Output LoDTensor; whereas the second sequence will
|
||||
be send to True Output LoDTensor. Please refer to MergeLoDTensorOp.)DOC");
|
||||
}
|
||||
};
|
||||
|
||||
class SplitLoDTensorInferShape : public framework::InferShapeBase {
|
||||
public:
|
||||
void operator()(framework::InferShapeContext *context) const override {
|
||||
PADDLE_ENFORCE(context->HasInput("X"),
|
||||
"SplitLoDTensorOp must has input X.");
|
||||
PADDLE_ENFORCE(context->HasInput("Mask"),
|
||||
"SplitLoDTensorOp must has input Mask.");
|
||||
PADDLE_ENFORCE(context->HasOutput("OutTrue"),
|
||||
"SplitLoDTensorOp must has output OutTrue.");
|
||||
PADDLE_ENFORCE(context->HasOutput("OutFalse"),
|
||||
"SplitLoDTensorOp must has output OutFalse.");
|
||||
|
||||
auto mask_dim = context->GetInputDim("Mask");
|
||||
PADDLE_ENFORCE_EQ(mask_dim.size(), 2);
|
||||
PADDLE_ENFORCE_EQ(mask_dim[1], 1);
|
||||
|
||||
context->SetOutputDim("OutTrue", context->GetInputDim("X"));
|
||||
context->SetOutputDim("OutFalse", context->GetInputDim("X"));
|
||||
}
|
||||
};
|
||||
|
||||
class SplitLoDTensorArrayGradMaker : public framework::SingleGradOpDescMaker {
|
||||
public:
|
||||
using framework::SingleGradOpDescMaker::SingleGradOpDescMaker;
|
||||
|
||||
protected:
|
||||
std::unique_ptr<framework::OpDescBind> Apply() const override {
|
||||
auto *grad_op = new framework::OpDescBind();
|
||||
grad_op->SetType("merge_lod_tensor");
|
||||
grad_op->SetInput("InTrue", OutputGrad("OutTrue"));
|
||||
grad_op->SetInput("InFalse", OutputGrad("OutFalse"));
|
||||
grad_op->SetInput("Mask", Input("Mask"));
|
||||
grad_op->SetInput("X", Input("X"));
|
||||
grad_op->SetOutput("Out", InputGrad("X"));
|
||||
grad_op->SetAttrMap(Attrs());
|
||||
return std::unique_ptr<framework::OpDescBind>(grad_op);
|
||||
}
|
||||
};
|
||||
|
||||
} // namespace operators
|
||||
} // namespace paddle
|
||||
|
||||
namespace ops = paddle::operators;
|
||||
REGISTER_OPERATOR(split_lod_tensor, ops::SplitLoDTensorOp,
|
||||
ops::SplitLoDTensorOpProtoMaker,
|
||||
ops::SplitLoDTensorInferShape,
|
||||
ops::SplitLoDTensorArrayGradMaker);
|
@ -0,0 +1,181 @@
|
||||
import unittest
|
||||
import paddle.v2.framework.core as core
|
||||
import numpy as np
|
||||
import paddle.v2.framework.layers as layers
|
||||
from paddle.v2.framework.framework import Program
|
||||
from paddle.v2.framework.executor import Executor
|
||||
from paddle.v2.framework.backward import append_backward_ops
|
||||
|
||||
|
||||
class TestCPULoDTensorArrayOps(unittest.TestCase):
|
||||
def place(self):
|
||||
return core.CPUPlace()
|
||||
|
||||
def test_split_and_merge_lod_tensor_no_lod(self):
|
||||
tensor = core.LoDTensor()
|
||||
tensor.set(np.arange(10).reshape(10, 1).astype('int32'), self.place())
|
||||
|
||||
mask_np = np.array([0, 0, 1, 1, 1, 1, 0, 0, 0, 0]).astype('bool')
|
||||
mask_np = np.expand_dims(mask_np, axis=1)
|
||||
|
||||
mask = core.LoDTensor()
|
||||
mask.set(mask_np, self.place())
|
||||
|
||||
expect_true_tensor = np.array([2, 3, 4, 5]).astype('int32')
|
||||
expect_true_tensor = np.expand_dims(expect_true_tensor, axis=1)
|
||||
expect_true = core.LoDTensor()
|
||||
expect_true.set(expect_true_tensor, self.place())
|
||||
|
||||
expect_false_tensor = np.array([0, 1, 6, 7, 8, 9]).astype('int32')
|
||||
expect_false_tensor = np.expand_dims(expect_false_tensor, axis=1)
|
||||
|
||||
expect_false = core.LoDTensor()
|
||||
expect_false.set(expect_false_tensor, self.place())
|
||||
|
||||
self.main(
|
||||
tensor=tensor,
|
||||
mask=mask,
|
||||
expect_true=expect_true,
|
||||
expect_false=expect_false,
|
||||
expect_out=tensor)
|
||||
|
||||
def test_split_and_merge_lod_tensor_level_0(self):
|
||||
tensor = core.LoDTensor()
|
||||
tensor.set(np.arange(10).reshape(10, 1).astype('int32'), self.place())
|
||||
tensor.set_lod([[0, 3, 9, 10]])
|
||||
|
||||
mask_np = np.array([0, 1, 0]).astype('bool')
|
||||
mask_np = np.expand_dims(mask_np, axis=1)
|
||||
|
||||
mask = core.LoDTensor()
|
||||
mask.set(mask_np, self.place())
|
||||
|
||||
expect_true_tensor = np.array([3, 4, 5, 6, 7, 8]).astype('int32')
|
||||
expect_true_tensor = np.expand_dims(expect_true_tensor, axis=1)
|
||||
expect_true = core.LoDTensor()
|
||||
expect_true.set(expect_true_tensor, self.place())
|
||||
expect_true.set_lod([[0, 6]])
|
||||
|
||||
expect_false_tensor = np.array([0, 1, 2, 9]).astype('int32')
|
||||
expect_false_tensor = np.expand_dims(expect_false_tensor, axis=1)
|
||||
expect_false_lod = [[0, 3, 4]]
|
||||
|
||||
expect_false = core.LoDTensor()
|
||||
expect_false.set(expect_false_tensor, self.place())
|
||||
expect_false.set_lod(expect_false_lod)
|
||||
|
||||
self.main(
|
||||
tensor=tensor,
|
||||
mask=mask,
|
||||
expect_true=expect_true,
|
||||
expect_false=expect_false,
|
||||
expect_out=tensor)
|
||||
|
||||
def main(self, tensor, mask, expect_true, expect_false, expect_out,
|
||||
level=0):
|
||||
place = self.place()
|
||||
program = Program()
|
||||
x = layers.data(name='x', shape=[1], main_program=program)
|
||||
x.persistable = True
|
||||
|
||||
y = layers.data(name='y', shape=[1], main_program=program)
|
||||
y.persistable = True
|
||||
|
||||
out_true, out_false = layers.split_lod_tensor(
|
||||
input=x, mask=y, level=level, main_program=program)
|
||||
out_true.persistable = True
|
||||
out_false.persistable = True
|
||||
|
||||
out = layers.merge_lod_tensor(
|
||||
in_true=out_true,
|
||||
in_false=out_false,
|
||||
mask=y,
|
||||
x=x,
|
||||
level=level,
|
||||
main_program=program)
|
||||
|
||||
out.persistable = True
|
||||
|
||||
exe = Executor(place)
|
||||
scope = core.Scope()
|
||||
exe.run(program, feed={'x': tensor, 'y': mask}, scope=scope)
|
||||
|
||||
var_true = scope.find_var(out_true.name).get_tensor()
|
||||
|
||||
var_false = scope.find_var(out_false.name).get_tensor()
|
||||
|
||||
var_out = scope.find_var(out.name).get_tensor()
|
||||
|
||||
self.check_tensor_same(var_true, expect_true)
|
||||
self.check_tensor_same(var_false, expect_false)
|
||||
self.check_tensor_same(var_out, expect_out)
|
||||
|
||||
def check_tensor_same(self, actual, expect):
|
||||
self.assertTrue(np.allclose(np.array(actual), np.array(expect)))
|
||||
self.assertEqual(actual.lod(), expect.lod())
|
||||
|
||||
|
||||
class TestCPUSplitMergeLoDTensorGrad(unittest.TestCase):
|
||||
def test_grad(self):
|
||||
place = core.CPUPlace()
|
||||
program = Program()
|
||||
|
||||
x = layers.data(
|
||||
name='x',
|
||||
shape=[1],
|
||||
data_type='float32',
|
||||
main_program=program,
|
||||
stop_gradient=False)
|
||||
y = layers.data(
|
||||
name='y',
|
||||
shape=[1],
|
||||
data_type='bool',
|
||||
main_program=program,
|
||||
stop_gradient=False)
|
||||
|
||||
level = 0
|
||||
|
||||
out_true, out_false = layers.split_lod_tensor(
|
||||
input=x, mask=y, level=level, main_program=program)
|
||||
out = layers.merge_lod_tensor(
|
||||
in_true=out_true,
|
||||
in_false=out_false,
|
||||
mask=y,
|
||||
x=x,
|
||||
level=level,
|
||||
main_program=program)
|
||||
mean = layers.mean(x=out, main_program=program)
|
||||
|
||||
append_backward_ops(mean)
|
||||
|
||||
tensor = core.LoDTensor()
|
||||
tensor.set(np.arange(10).reshape(10, 1).astype('float32'), place)
|
||||
tensor.set_lod([[0, 3, 9, 10]])
|
||||
|
||||
mask_np = np.array([0, 1, 0]).astype('bool')
|
||||
mask_np = np.expand_dims(mask_np, axis=1)
|
||||
|
||||
mask = core.LoDTensor()
|
||||
mask.set(mask_np, place)
|
||||
|
||||
exe = Executor(place)
|
||||
scope = core.Scope()
|
||||
|
||||
g_vars = program.global_block().var(x.name + "@GRAD")
|
||||
g_out = [
|
||||
item.sum()
|
||||
for item in map(np.array,
|
||||
exe.run(program,
|
||||
feed={'x': tensor,
|
||||
'y': mask},
|
||||
fetch_list=[g_vars],
|
||||
scope=scope))
|
||||
]
|
||||
|
||||
g_out_sum = np.array(g_out).sum()
|
||||
|
||||
self.assertAlmostEqual(1.0, g_out_sum, delta=0.1)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
unittest.main()
|
Loading…
Reference in new issue