Fix clip.py (#14718)
* expose square test=develop * fix activation test=develop * Add square API test=develop * add necessary op * code refine * fix API.spec test=develop * fix unit test test=develop * add unit test sparse_grad_clip test=develop * fix API.spec test=develop * remove mac test for test_gradient_clip test=develop * remove selectedrows_mul_tensor test=developrevert-14398-imperative
parent
052cc5f538
commit
04539d4c5d
@ -0,0 +1,117 @@
|
|||||||
|
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
|
||||||
|
|
||||||
|
Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
you may not use this file except in compliance with the License.
|
||||||
|
You may obtain a copy of the License at
|
||||||
|
|
||||||
|
http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
|
||||||
|
Unless required by applicable law or agreed to in writing, software
|
||||||
|
distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
See the License for the specific language governing permissions and
|
||||||
|
limitations under the License. */
|
||||||
|
|
||||||
|
#include "paddle/fluid/framework/op_registry.h"
|
||||||
|
#include "paddle/fluid/framework/tensor_util.h"
|
||||||
|
|
||||||
|
namespace paddle {
|
||||||
|
namespace operators {
|
||||||
|
|
||||||
|
class GetTensorFromSelectedRowsOp : public framework::OperatorWithKernel {
|
||||||
|
public:
|
||||||
|
using framework::OperatorWithKernel::OperatorWithKernel;
|
||||||
|
|
||||||
|
void InferShape(framework::InferShapeContext *ctx) const override {
|
||||||
|
PADDLE_ENFORCE(ctx->HasInput("X"),
|
||||||
|
"GetTensorFromSelectedRowsOp must has input X.");
|
||||||
|
PADDLE_ENFORCE(ctx->HasOutput("Out"),
|
||||||
|
"GetTensorFromSelectedRowsOp must has output Out.");
|
||||||
|
PADDLE_ENFORCE(
|
||||||
|
ctx->GetInputsVarType("X").front() ==
|
||||||
|
framework::proto::VarType::SELECTED_ROWS,
|
||||||
|
"The input X's type should be SelectedRows, but the received is %s",
|
||||||
|
ctx->Inputs("X").front(), ctx->GetInputsVarType("X").front());
|
||||||
|
PADDLE_ENFORCE(
|
||||||
|
ctx->GetOutputsVarType("Out").front() ==
|
||||||
|
framework::proto::VarType::LOD_TENSOR,
|
||||||
|
"The output Out's type should be LoDTensor, but the received is %s",
|
||||||
|
ctx->Outputs("Out").front(), ctx->GetOutputsVarType("Out").front());
|
||||||
|
|
||||||
|
ctx->SetOutputDim("Out", ctx->GetInputDim("X"));
|
||||||
|
}
|
||||||
|
|
||||||
|
protected:
|
||||||
|
framework::OpKernelType GetExpectedKernelType(
|
||||||
|
const framework::ExecutionContext &ctx) const override {
|
||||||
|
return framework::OpKernelType(
|
||||||
|
framework::GetDataTypeOfVar(ctx.InputVar("X")), ctx.device_context());
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
class GetTensorFromSelectedRowsKernel {
|
||||||
|
public:
|
||||||
|
void operator()(const framework::ExecutionContext &ctx) const {
|
||||||
|
auto *x = ctx.Input<framework::SelectedRows>("X");
|
||||||
|
auto *out = ctx.Output<framework::LoDTensor>("Out");
|
||||||
|
|
||||||
|
out->Resize(x->value().dims());
|
||||||
|
out->mutable_data(ctx.GetPlace(), x->value().type());
|
||||||
|
framework::TensorCopy(x->value(), ctx.GetPlace(), ctx.device_context(),
|
||||||
|
out);
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
class GetTensorFromSelectedRowsOpProtoMaker
|
||||||
|
: public framework::OpProtoAndCheckerMaker {
|
||||||
|
public:
|
||||||
|
void Make() override {
|
||||||
|
AddInput("X", "The input type is SelectedRows.");
|
||||||
|
AddOutput("Out", "The output type is LoDTensor.");
|
||||||
|
AddComment(
|
||||||
|
R"DOC(
|
||||||
|
GetTensorFromSelectedRows Operator
|
||||||
|
|
||||||
|
GetTensorFromSelectedRows is used to get the tensor from SelectedRows.
|
||||||
|
|
||||||
|
)DOC");
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
class GetTensorFromSelectedRowsOpVarTypeInference
|
||||||
|
: public framework::VarTypeInference {
|
||||||
|
public:
|
||||||
|
void operator()(const framework::OpDesc &op_desc,
|
||||||
|
framework::BlockDesc *block) const final {
|
||||||
|
auto out_var_name = op_desc.Output("Out").front();
|
||||||
|
auto in_var_name = op_desc.Input("X").front();
|
||||||
|
|
||||||
|
auto out_var = block->FindRecursiveOrCreateVar(out_var_name);
|
||||||
|
auto in_var = block->FindRecursiveOrCreateVar(in_var_name);
|
||||||
|
out_var.SetType(framework::proto::VarType::LOD_TENSOR);
|
||||||
|
out_var.SetDataType(in_var.GetDataType());
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
} // namespace operators
|
||||||
|
} // namespace paddle
|
||||||
|
|
||||||
|
namespace ops = paddle::operators;
|
||||||
|
REGISTER_OPERATOR(get_tensor_from_selected_rows,
|
||||||
|
ops::GetTensorFromSelectedRowsOp,
|
||||||
|
ops::GetTensorFromSelectedRowsOpProtoMaker,
|
||||||
|
ops::GetTensorFromSelectedRowsOpVarTypeInference);
|
||||||
|
|
||||||
|
REGISTER_OP_CPU_KERNEL_FUNCTOR(get_tensor_from_selected_rows, float,
|
||||||
|
ops::GetTensorFromSelectedRowsKernel, double,
|
||||||
|
ops::GetTensorFromSelectedRowsKernel, int,
|
||||||
|
ops::GetTensorFromSelectedRowsKernel, int64_t,
|
||||||
|
ops::GetTensorFromSelectedRowsKernel);
|
||||||
|
|
||||||
|
#ifdef PADDLE_WITH_CUDA
|
||||||
|
REGISTER_OP_CUDA_KERNEL_FUNCTOR(get_tensor_from_selected_rows, float,
|
||||||
|
ops::GetTensorFromSelectedRowsKernel, double,
|
||||||
|
ops::GetTensorFromSelectedRowsKernel, int,
|
||||||
|
ops::GetTensorFromSelectedRowsKernel, int64_t,
|
||||||
|
ops::GetTensorFromSelectedRowsKernel);
|
||||||
|
#endif
|
@ -0,0 +1,72 @@
|
|||||||
|
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
|
||||||
|
|
||||||
|
Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
you may not use this file except in compliance with the License.
|
||||||
|
You may obtain a copy of the License at
|
||||||
|
|
||||||
|
http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
|
||||||
|
Unless required by applicable law or agreed to in writing, software
|
||||||
|
distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
See the License for the specific language governing permissions and
|
||||||
|
limitations under the License. */
|
||||||
|
|
||||||
|
#include "paddle/fluid/operators/merge_selected_rows_op.h"
|
||||||
|
|
||||||
|
namespace paddle {
|
||||||
|
namespace operators {
|
||||||
|
|
||||||
|
class MergeSelectedRowsOp : public framework::OperatorWithKernel {
|
||||||
|
public:
|
||||||
|
using framework::OperatorWithKernel::OperatorWithKernel;
|
||||||
|
|
||||||
|
void InferShape(framework::InferShapeContext* ctx) const override {
|
||||||
|
PADDLE_ENFORCE(ctx->HasInput("X"),
|
||||||
|
"Input(X) of MergeSelectedRowsOp should not be null.");
|
||||||
|
PADDLE_ENFORCE(ctx->HasOutput("Out"),
|
||||||
|
"Output(Out) of MergeSelectedRowsOp should not be null.");
|
||||||
|
ctx->ShareDim("X", /*->*/ "Out");
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
class MergeSelectedRowsOpMaker : public framework::OpProtoAndCheckerMaker {
|
||||||
|
public:
|
||||||
|
void Make() override {
|
||||||
|
AddInput("X",
|
||||||
|
"The input type is SelectedRows, and the selected rows may be "
|
||||||
|
"duplicated.");
|
||||||
|
AddOutput("Out",
|
||||||
|
"The output type is SelectedRows, and the selected rows are not "
|
||||||
|
"duplicated.");
|
||||||
|
AddComment(
|
||||||
|
R"DOC(
|
||||||
|
MergeSelectedRows Operator.
|
||||||
|
|
||||||
|
MergeSelectedRows is used to merge the duplicated rows of the input.
|
||||||
|
)DOC");
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
class MergeSelectedRowsOpInferVarType
|
||||||
|
: public framework::PassInDtypeAndVarTypeToOutput {
|
||||||
|
protected:
|
||||||
|
std::unordered_map<std::string, std::string> GetInputOutputWithSameType()
|
||||||
|
const override {
|
||||||
|
return std::unordered_map<std::string, std::string>{{"X", /*->*/ "Out"}};
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
} // namespace operators
|
||||||
|
} // namespace paddle
|
||||||
|
|
||||||
|
namespace ops = paddle::operators;
|
||||||
|
namespace plat = paddle::platform;
|
||||||
|
REGISTER_OPERATOR(merge_selected_rows, ops::MergeSelectedRowsOp,
|
||||||
|
ops::MergeSelectedRowsOpMaker,
|
||||||
|
ops::MergeSelectedRowsOpInferVarType);
|
||||||
|
|
||||||
|
REGISTER_OP_CPU_KERNEL(
|
||||||
|
merge_selected_rows,
|
||||||
|
ops::MergeSelectedRowsKernel<plat::CPUDeviceContext, float>,
|
||||||
|
ops::MergeSelectedRowsKernel<plat::CPUDeviceContext, double>);
|
@ -0,0 +1,23 @@
|
|||||||
|
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
|
||||||
|
|
||||||
|
Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
you may not use this file except in compliance with the License.
|
||||||
|
You may obtain a copy of the License at
|
||||||
|
|
||||||
|
http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
|
||||||
|
Unless required by applicable law or agreed to in writing, software
|
||||||
|
distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
See the License for the specific language governing permissions and
|
||||||
|
limitations under the License. */
|
||||||
|
|
||||||
|
#include "paddle/fluid/operators/merge_selected_rows_op.h"
|
||||||
|
|
||||||
|
namespace ops = paddle::operators;
|
||||||
|
namespace plat = paddle::platform;
|
||||||
|
|
||||||
|
REGISTER_OP_CUDA_KERNEL(
|
||||||
|
merge_selected_rows,
|
||||||
|
ops::MergeSelectedRowsKernel<plat::CUDADeviceContext, float>,
|
||||||
|
ops::MergeSelectedRowsKernel<plat::CUDADeviceContext, double>);
|
@ -0,0 +1,36 @@
|
|||||||
|
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
|
||||||
|
|
||||||
|
Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
you may not use this file except in compliance with the License.
|
||||||
|
You may obtain a copy of the License at
|
||||||
|
|
||||||
|
http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
|
||||||
|
Unless required by applicable law or agreed to in writing, software
|
||||||
|
distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
See the License for the specific language governing permissions and
|
||||||
|
limitations under the License. */
|
||||||
|
|
||||||
|
#pragma once
|
||||||
|
#include <string>
|
||||||
|
#include "paddle/fluid/framework/op_registry.h"
|
||||||
|
#include "paddle/fluid/operators/math/selected_rows_functor.h"
|
||||||
|
|
||||||
|
namespace paddle {
|
||||||
|
namespace operators {
|
||||||
|
|
||||||
|
template <typename DeviceContext, typename T>
|
||||||
|
class MergeSelectedRowsKernel : public framework::OpKernel<T> {
|
||||||
|
public:
|
||||||
|
void Compute(const framework::ExecutionContext& context) const override {
|
||||||
|
auto* x = context.Input<framework::SelectedRows>("X");
|
||||||
|
auto* out = context.Output<framework::SelectedRows>("Out");
|
||||||
|
|
||||||
|
math::scatter::MergeAdd<DeviceContext, T> merge_func;
|
||||||
|
merge_func(context.template device_context<DeviceContext>(), *x, out);
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
} // namespace operators
|
||||||
|
} // namespace paddle
|
@ -1,84 +0,0 @@
|
|||||||
# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
|
|
||||||
#
|
|
||||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
||||||
# you may not use this file except in compliance with the License.
|
|
||||||
# You may obtain a copy of the License at
|
|
||||||
#
|
|
||||||
# http://www.apache.org/licenses/LICENSE-2.0
|
|
||||||
#
|
|
||||||
# Unless required by applicable law or agreed to in writing, software
|
|
||||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
||||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
||||||
# See the License for the specific language governing permissions and
|
|
||||||
# limitations under the License.
|
|
||||||
|
|
||||||
from __future__ import print_function
|
|
||||||
|
|
||||||
import numpy as np
|
|
||||||
import paddle
|
|
||||||
import paddle.fluid as fluid
|
|
||||||
|
|
||||||
BATCH_SIZE = 128
|
|
||||||
CLIP = 1
|
|
||||||
|
|
||||||
prog = fluid.framework.Program()
|
|
||||||
with fluid.program_guard(main_program=prog):
|
|
||||||
image = fluid.layers.data(name='x', shape=[784], dtype='float32')
|
|
||||||
|
|
||||||
hidden1 = fluid.layers.fc(input=image, size=128, act='relu')
|
|
||||||
hidden2 = fluid.layers.fc(input=hidden1, size=64, act='relu')
|
|
||||||
predict = fluid.layers.fc(input=hidden2, size=10, act='softmax')
|
|
||||||
|
|
||||||
label = fluid.layers.data(name='y', shape=[1], dtype='int64')
|
|
||||||
|
|
||||||
cost = fluid.layers.cross_entropy(input=predict, label=label)
|
|
||||||
avg_cost = fluid.layers.mean(cost)
|
|
||||||
|
|
||||||
prog_clip = prog.clone()
|
|
||||||
|
|
||||||
avg_cost_clip = prog_clip.block(0).var(avg_cost.name)
|
|
||||||
|
|
||||||
p_g = fluid.backward.append_backward(loss=avg_cost)
|
|
||||||
p_g_clip = fluid.backward.append_backward(loss=avg_cost_clip)
|
|
||||||
|
|
||||||
with fluid.program_guard(main_program=prog_clip):
|
|
||||||
fluid.clip.set_gradient_clip(
|
|
||||||
fluid.clip.GradientClipByGlobalNorm(clip_norm=CLIP))
|
|
||||||
p_g_clip = fluid.clip.append_gradient_clip_ops(p_g_clip)
|
|
||||||
|
|
||||||
grad_list = [elem[1] for elem in p_g]
|
|
||||||
grad_clip_list = [elem[1] for elem in p_g_clip]
|
|
||||||
|
|
||||||
train_reader = paddle.batch(
|
|
||||||
paddle.reader.shuffle(
|
|
||||||
paddle.dataset.mnist.train(), buf_size=8192),
|
|
||||||
batch_size=BATCH_SIZE)
|
|
||||||
|
|
||||||
place = fluid.CPUPlace()
|
|
||||||
exe = fluid.Executor(place)
|
|
||||||
feeder = fluid.DataFeeder(feed_list=[image, label], place=place)
|
|
||||||
exe.run(fluid.default_startup_program())
|
|
||||||
|
|
||||||
count = 0
|
|
||||||
for data in train_reader():
|
|
||||||
count += 1
|
|
||||||
if count > 5:
|
|
||||||
break
|
|
||||||
out = exe.run(prog, feed=feeder.feed(data), fetch_list=grad_list)
|
|
||||||
out_clip = exe.run(prog_clip,
|
|
||||||
feed=feeder.feed(data),
|
|
||||||
fetch_list=grad_clip_list)
|
|
||||||
global_norm = 0
|
|
||||||
for v in out[1:]:
|
|
||||||
global_norm += np.sum(np.power(v, 2))
|
|
||||||
global_norm = np.sqrt(global_norm)
|
|
||||||
|
|
||||||
global_norm_clip = 0
|
|
||||||
for v in out_clip[1:]:
|
|
||||||
global_norm_clip += np.sum(np.power(v, 2))
|
|
||||||
global_norm_clip = np.sqrt(global_norm_clip)
|
|
||||||
|
|
||||||
if not np.isclose(
|
|
||||||
a=global_norm_clip, b=np.minimum(global_norm, CLIP), rtol=5e-3):
|
|
||||||
exit(1)
|
|
||||||
exit(0)
|
|
@ -0,0 +1,65 @@
|
|||||||
|
# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
|
||||||
|
#
|
||||||
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
# you may not use this file except in compliance with the License.
|
||||||
|
# You may obtain a copy of the License at
|
||||||
|
#
|
||||||
|
# http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
#
|
||||||
|
# Unless required by applicable law or agreed to in writing, software
|
||||||
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
# See the License for the specific language governing permissions and
|
||||||
|
# limitations under the License.
|
||||||
|
|
||||||
|
from __future__ import print_function
|
||||||
|
|
||||||
|
import unittest
|
||||||
|
import paddle.fluid.core as core
|
||||||
|
import numpy as np
|
||||||
|
from paddle.fluid.op import Operator
|
||||||
|
|
||||||
|
|
||||||
|
class TestGetTensorFromSelectedRows(unittest.TestCase):
|
||||||
|
def get_places(self):
|
||||||
|
places = [core.CPUPlace()]
|
||||||
|
if core.is_compiled_with_cuda():
|
||||||
|
places.append(core.CUDAPlace(0))
|
||||||
|
return places
|
||||||
|
|
||||||
|
def check_with_place(self, place):
|
||||||
|
scope = core.Scope()
|
||||||
|
x_rows = [0, 5, 5, 4, 20]
|
||||||
|
height = 20
|
||||||
|
row_numel = 2
|
||||||
|
|
||||||
|
np_array = np.ones((len(x_rows), row_numel)).astype("float32")
|
||||||
|
np_array[1, :] = 2.0
|
||||||
|
np_array[2, :] = 3.0
|
||||||
|
np_array[3, :] = 4.0
|
||||||
|
|
||||||
|
# initialize input variable X
|
||||||
|
x = scope.var('X').get_selected_rows()
|
||||||
|
x.set_rows(x_rows)
|
||||||
|
x.set_height(height)
|
||||||
|
x_tensor = x.get_tensor()
|
||||||
|
x_tensor.set(np_array, place)
|
||||||
|
|
||||||
|
# initialize input variable Out
|
||||||
|
out = scope.var("Out").get_tensor()
|
||||||
|
|
||||||
|
op = Operator("get_tensor_from_selected_rows", X="X", Out="Out")
|
||||||
|
|
||||||
|
op.run(scope, place)
|
||||||
|
|
||||||
|
out_array = np.array(out)
|
||||||
|
self.assertEqual((5, 2), out_array.shape)
|
||||||
|
assert (out_array == np_array).all()
|
||||||
|
|
||||||
|
def test_check_output(self):
|
||||||
|
for place in self.get_places():
|
||||||
|
self.check_with_place(place)
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
unittest.main()
|
@ -0,0 +1,162 @@
|
|||||||
|
# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
|
||||||
|
#
|
||||||
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
# you may not use this file except in compliance with the License.
|
||||||
|
# You may obtain a copy of the License at
|
||||||
|
#
|
||||||
|
# http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
#
|
||||||
|
# Unless required by applicable law or agreed to in writing, software
|
||||||
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
# See the License for the specific language governing permissions and
|
||||||
|
# limitations under the License.
|
||||||
|
|
||||||
|
from __future__ import print_function
|
||||||
|
|
||||||
|
import unittest
|
||||||
|
import numpy as np
|
||||||
|
import paddle
|
||||||
|
import paddle.fluid.core as core
|
||||||
|
import paddle.fluid as fluid
|
||||||
|
|
||||||
|
BATCH_SIZE = 128
|
||||||
|
CLIP = 1
|
||||||
|
|
||||||
|
|
||||||
|
def bow_net(data,
|
||||||
|
label,
|
||||||
|
dict_dim,
|
||||||
|
emb_dim=128,
|
||||||
|
hid_dim=128,
|
||||||
|
hid_dim2=96,
|
||||||
|
class_dim=2):
|
||||||
|
"""
|
||||||
|
BOW net
|
||||||
|
This model is from https://github.com/PaddlePaddle/models:
|
||||||
|
fluid/PaddleNLP/text_classification/nets.py
|
||||||
|
"""
|
||||||
|
emb = fluid.layers.embedding(
|
||||||
|
input=data, is_sparse=True, size=[dict_dim, emb_dim])
|
||||||
|
bow = fluid.layers.sequence_pool(input=emb, pool_type='sum')
|
||||||
|
bow_tanh = fluid.layers.tanh(bow)
|
||||||
|
fc_1 = fluid.layers.fc(input=bow_tanh, size=hid_dim, act="tanh")
|
||||||
|
fc_2 = fluid.layers.fc(input=fc_1, size=hid_dim2, act="tanh")
|
||||||
|
prediction = fluid.layers.fc(input=[fc_2], size=class_dim, act="softmax")
|
||||||
|
cost = fluid.layers.cross_entropy(input=prediction, label=label)
|
||||||
|
avg_cost = fluid.layers.mean(x=cost)
|
||||||
|
|
||||||
|
return avg_cost
|
||||||
|
|
||||||
|
|
||||||
|
class TestGradientClip(unittest.TestCase):
|
||||||
|
def setUp(self):
|
||||||
|
self.word_dict = paddle.dataset.imdb.word_dict()
|
||||||
|
self.BATCH_SIZE = 2
|
||||||
|
self.train_data = paddle.batch(
|
||||||
|
paddle.dataset.imdb.train(self.word_dict),
|
||||||
|
batch_size=self.BATCH_SIZE)
|
||||||
|
|
||||||
|
def get_places(self):
|
||||||
|
places = [core.CPUPlace()]
|
||||||
|
if core.is_compiled_with_cuda():
|
||||||
|
places.append(core.CUDAPlace(0))
|
||||||
|
return places
|
||||||
|
|
||||||
|
def check_operators(self, place):
|
||||||
|
prog = fluid.framework.Program()
|
||||||
|
startup_program = fluid.framework.Program()
|
||||||
|
with fluid.program_guard(
|
||||||
|
main_program=prog, startup_program=startup_program):
|
||||||
|
image = fluid.layers.data(name='x', shape=[784], dtype='float32')
|
||||||
|
label = fluid.layers.data(name='y', shape=[1], dtype='int64')
|
||||||
|
|
||||||
|
hidden1 = fluid.layers.fc(input=image, size=128, act='relu')
|
||||||
|
hidden2 = fluid.layers.fc(input=hidden1, size=64, act='relu')
|
||||||
|
predict = fluid.layers.fc(input=hidden2, size=10, act='softmax')
|
||||||
|
|
||||||
|
cost = fluid.layers.cross_entropy(input=predict, label=label)
|
||||||
|
avg_cost = fluid.layers.mean(cost)
|
||||||
|
|
||||||
|
prog_clip = prog.clone()
|
||||||
|
|
||||||
|
avg_cost_clip = prog_clip.block(0).var(avg_cost.name)
|
||||||
|
|
||||||
|
p_g = fluid.backward.append_backward(loss=avg_cost)
|
||||||
|
p_g_clip = fluid.backward.append_backward(loss=avg_cost_clip)
|
||||||
|
|
||||||
|
with fluid.program_guard(main_program=prog_clip):
|
||||||
|
fluid.clip.set_gradient_clip(
|
||||||
|
fluid.clip.GradientClipByGlobalNorm(clip_norm=CLIP))
|
||||||
|
p_g_clip = fluid.clip.append_gradient_clip_ops(p_g_clip)
|
||||||
|
|
||||||
|
grad_list = [elem[1] for elem in p_g]
|
||||||
|
grad_clip_list = [elem[1] for elem in p_g_clip]
|
||||||
|
|
||||||
|
train_reader = paddle.batch(
|
||||||
|
paddle.reader.shuffle(
|
||||||
|
paddle.dataset.mnist.train(), buf_size=8192),
|
||||||
|
batch_size=BATCH_SIZE)
|
||||||
|
|
||||||
|
exe = fluid.Executor(place)
|
||||||
|
feeder = fluid.DataFeeder(feed_list=[image, label], place=place)
|
||||||
|
exe.run(startup_program)
|
||||||
|
|
||||||
|
count = 0
|
||||||
|
for data in train_reader():
|
||||||
|
count += 1
|
||||||
|
if count > 5:
|
||||||
|
break
|
||||||
|
out = exe.run(prog, feed=feeder.feed(data), fetch_list=grad_list)
|
||||||
|
out_clip = exe.run(prog_clip,
|
||||||
|
feed=feeder.feed(data),
|
||||||
|
fetch_list=grad_clip_list)
|
||||||
|
global_norm = 0
|
||||||
|
for v in out[1:]:
|
||||||
|
global_norm += np.sum(np.power(v, 2))
|
||||||
|
global_norm = np.sqrt(global_norm)
|
||||||
|
|
||||||
|
global_norm_clip = 0
|
||||||
|
for v in out_clip[1:]:
|
||||||
|
global_norm_clip += np.sum(np.power(v, 2))
|
||||||
|
global_norm_clip = np.sqrt(global_norm_clip)
|
||||||
|
|
||||||
|
assert np.isclose(
|
||||||
|
a=global_norm_clip, b=np.minimum(global_norm, CLIP), rtol=5e-3)
|
||||||
|
|
||||||
|
def check_sparse_gradient_clip(self, place):
|
||||||
|
prog = fluid.framework.Program()
|
||||||
|
startup_program = fluid.framework.Program()
|
||||||
|
with fluid.program_guard(
|
||||||
|
main_program=prog, startup_program=startup_program):
|
||||||
|
data = fluid.layers.data(
|
||||||
|
name="words", shape=[1], dtype="int64", lod_level=1)
|
||||||
|
label = fluid.layers.data(name="label", shape=[1], dtype="int64")
|
||||||
|
cost = bow_net(data, label, len(self.word_dict))
|
||||||
|
|
||||||
|
fluid.clip.set_gradient_clip(
|
||||||
|
clip=fluid.clip.GradientClipByGlobalNorm(clip_norm=5.0))
|
||||||
|
|
||||||
|
sgd_optimizer = fluid.optimizer.SGD(learning_rate=0.01)
|
||||||
|
sgd_optimizer.minimize(cost)
|
||||||
|
|
||||||
|
exe = fluid.Executor(place)
|
||||||
|
feeder = fluid.DataFeeder(feed_list=[data, label], place=place)
|
||||||
|
exe.run(startup_program)
|
||||||
|
|
||||||
|
data = next(self.train_data())
|
||||||
|
val = exe.run(prog, feed=feeder.feed(data), fetch_list=[cost])[0]
|
||||||
|
self.assertEqual((1, ), val.shape)
|
||||||
|
print(val)
|
||||||
|
self.assertFalse(np.isnan(val))
|
||||||
|
|
||||||
|
def test_operators(self):
|
||||||
|
self.check_operators(core.CPUPlace())
|
||||||
|
|
||||||
|
def test_sparse_gradient_clip(self):
|
||||||
|
for place in self.get_places():
|
||||||
|
self.check_sparse_gradient_clip(place)
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == '__main__':
|
||||||
|
unittest.main()
|
@ -0,0 +1,73 @@
|
|||||||
|
# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
|
||||||
|
#
|
||||||
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
# you may not use this file except in compliance with the License.
|
||||||
|
# You may obtain a copy of the License at
|
||||||
|
#
|
||||||
|
# http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
#
|
||||||
|
# Unless required by applicable law or agreed to in writing, software
|
||||||
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
# See the License for the specific language governing permissions and
|
||||||
|
# limitations under the License.
|
||||||
|
|
||||||
|
from __future__ import print_function
|
||||||
|
|
||||||
|
import unittest
|
||||||
|
import paddle.fluid.core as core
|
||||||
|
import numpy as np
|
||||||
|
from paddle.fluid.op import Operator
|
||||||
|
|
||||||
|
|
||||||
|
class TestMergeSelectedRows(unittest.TestCase):
|
||||||
|
def get_places(self):
|
||||||
|
places = [core.CPUPlace()]
|
||||||
|
if core.is_compiled_with_cuda():
|
||||||
|
places.append(core.CUDAPlace(0))
|
||||||
|
return places
|
||||||
|
|
||||||
|
def check_with_place(self, place):
|
||||||
|
scope = core.Scope()
|
||||||
|
x_rows = [0, 5, 5, 4, 20]
|
||||||
|
out_rows = [0, 4, 5, 20]
|
||||||
|
height = 20
|
||||||
|
row_numel = 2
|
||||||
|
|
||||||
|
np_array = np.ones((len(x_rows), row_numel)).astype("float32")
|
||||||
|
np_array[1, :] = 2.0
|
||||||
|
np_array[2, :] = 3.0
|
||||||
|
np_array[3, :] = 4.0
|
||||||
|
|
||||||
|
# initialize input variable X
|
||||||
|
x = scope.var('X').get_selected_rows()
|
||||||
|
x.set_rows(x_rows)
|
||||||
|
x.set_height(height)
|
||||||
|
x_tensor = x.get_tensor()
|
||||||
|
x_tensor.set(np_array, place)
|
||||||
|
|
||||||
|
# initialize input variable Out
|
||||||
|
out = scope.var("Out").get_selected_rows()
|
||||||
|
|
||||||
|
op = Operator("merge_selected_rows", X="X", Out="Out")
|
||||||
|
|
||||||
|
op.run(scope, place)
|
||||||
|
|
||||||
|
self.assertEqual(out.rows(), out_rows)
|
||||||
|
self.assertEqual(out.height(), height)
|
||||||
|
|
||||||
|
out_array = np.array(out.get_tensor())
|
||||||
|
self.assertEqual((4, 2), out_array.shape)
|
||||||
|
|
||||||
|
assert (out_array[0, :] == 1.0).all()
|
||||||
|
assert (out_array[1, :] == 4.0).all()
|
||||||
|
assert (out_array[2, :] == 5.0).all()
|
||||||
|
assert (out_array[3, :] == 1.0).all()
|
||||||
|
|
||||||
|
def test_check_output(self):
|
||||||
|
for place in self.get_places():
|
||||||
|
self.check_with_place(place)
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
unittest.main()
|
Loading…
Reference in new issue