Fix clip.py (#14718)
* expose square test=develop * fix activation test=develop * Add square API test=develop * add necessary op * code refine * fix API.spec test=develop * fix unit test test=develop * add unit test sparse_grad_clip test=develop * fix API.spec test=develop * remove mac test for test_gradient_clip test=develop * remove selectedrows_mul_tensor test=developrevert-14398-imperative
parent
052cc5f538
commit
04539d4c5d
@ -0,0 +1,117 @@
|
||||
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License. */
|
||||
|
||||
#include "paddle/fluid/framework/op_registry.h"
|
||||
#include "paddle/fluid/framework/tensor_util.h"
|
||||
|
||||
namespace paddle {
|
||||
namespace operators {
|
||||
|
||||
class GetTensorFromSelectedRowsOp : public framework::OperatorWithKernel {
|
||||
public:
|
||||
using framework::OperatorWithKernel::OperatorWithKernel;
|
||||
|
||||
void InferShape(framework::InferShapeContext *ctx) const override {
|
||||
PADDLE_ENFORCE(ctx->HasInput("X"),
|
||||
"GetTensorFromSelectedRowsOp must has input X.");
|
||||
PADDLE_ENFORCE(ctx->HasOutput("Out"),
|
||||
"GetTensorFromSelectedRowsOp must has output Out.");
|
||||
PADDLE_ENFORCE(
|
||||
ctx->GetInputsVarType("X").front() ==
|
||||
framework::proto::VarType::SELECTED_ROWS,
|
||||
"The input X's type should be SelectedRows, but the received is %s",
|
||||
ctx->Inputs("X").front(), ctx->GetInputsVarType("X").front());
|
||||
PADDLE_ENFORCE(
|
||||
ctx->GetOutputsVarType("Out").front() ==
|
||||
framework::proto::VarType::LOD_TENSOR,
|
||||
"The output Out's type should be LoDTensor, but the received is %s",
|
||||
ctx->Outputs("Out").front(), ctx->GetOutputsVarType("Out").front());
|
||||
|
||||
ctx->SetOutputDim("Out", ctx->GetInputDim("X"));
|
||||
}
|
||||
|
||||
protected:
|
||||
framework::OpKernelType GetExpectedKernelType(
|
||||
const framework::ExecutionContext &ctx) const override {
|
||||
return framework::OpKernelType(
|
||||
framework::GetDataTypeOfVar(ctx.InputVar("X")), ctx.device_context());
|
||||
}
|
||||
};
|
||||
|
||||
class GetTensorFromSelectedRowsKernel {
|
||||
public:
|
||||
void operator()(const framework::ExecutionContext &ctx) const {
|
||||
auto *x = ctx.Input<framework::SelectedRows>("X");
|
||||
auto *out = ctx.Output<framework::LoDTensor>("Out");
|
||||
|
||||
out->Resize(x->value().dims());
|
||||
out->mutable_data(ctx.GetPlace(), x->value().type());
|
||||
framework::TensorCopy(x->value(), ctx.GetPlace(), ctx.device_context(),
|
||||
out);
|
||||
}
|
||||
};
|
||||
|
||||
class GetTensorFromSelectedRowsOpProtoMaker
|
||||
: public framework::OpProtoAndCheckerMaker {
|
||||
public:
|
||||
void Make() override {
|
||||
AddInput("X", "The input type is SelectedRows.");
|
||||
AddOutput("Out", "The output type is LoDTensor.");
|
||||
AddComment(
|
||||
R"DOC(
|
||||
GetTensorFromSelectedRows Operator
|
||||
|
||||
GetTensorFromSelectedRows is used to get the tensor from SelectedRows.
|
||||
|
||||
)DOC");
|
||||
}
|
||||
};
|
||||
|
||||
class GetTensorFromSelectedRowsOpVarTypeInference
|
||||
: public framework::VarTypeInference {
|
||||
public:
|
||||
void operator()(const framework::OpDesc &op_desc,
|
||||
framework::BlockDesc *block) const final {
|
||||
auto out_var_name = op_desc.Output("Out").front();
|
||||
auto in_var_name = op_desc.Input("X").front();
|
||||
|
||||
auto out_var = block->FindRecursiveOrCreateVar(out_var_name);
|
||||
auto in_var = block->FindRecursiveOrCreateVar(in_var_name);
|
||||
out_var.SetType(framework::proto::VarType::LOD_TENSOR);
|
||||
out_var.SetDataType(in_var.GetDataType());
|
||||
}
|
||||
};
|
||||
|
||||
} // namespace operators
|
||||
} // namespace paddle
|
||||
|
||||
namespace ops = paddle::operators;
|
||||
REGISTER_OPERATOR(get_tensor_from_selected_rows,
|
||||
ops::GetTensorFromSelectedRowsOp,
|
||||
ops::GetTensorFromSelectedRowsOpProtoMaker,
|
||||
ops::GetTensorFromSelectedRowsOpVarTypeInference);
|
||||
|
||||
REGISTER_OP_CPU_KERNEL_FUNCTOR(get_tensor_from_selected_rows, float,
|
||||
ops::GetTensorFromSelectedRowsKernel, double,
|
||||
ops::GetTensorFromSelectedRowsKernel, int,
|
||||
ops::GetTensorFromSelectedRowsKernel, int64_t,
|
||||
ops::GetTensorFromSelectedRowsKernel);
|
||||
|
||||
#ifdef PADDLE_WITH_CUDA
|
||||
REGISTER_OP_CUDA_KERNEL_FUNCTOR(get_tensor_from_selected_rows, float,
|
||||
ops::GetTensorFromSelectedRowsKernel, double,
|
||||
ops::GetTensorFromSelectedRowsKernel, int,
|
||||
ops::GetTensorFromSelectedRowsKernel, int64_t,
|
||||
ops::GetTensorFromSelectedRowsKernel);
|
||||
#endif
|
@ -0,0 +1,72 @@
|
||||
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License. */
|
||||
|
||||
#include "paddle/fluid/operators/merge_selected_rows_op.h"
|
||||
|
||||
namespace paddle {
|
||||
namespace operators {
|
||||
|
||||
class MergeSelectedRowsOp : public framework::OperatorWithKernel {
|
||||
public:
|
||||
using framework::OperatorWithKernel::OperatorWithKernel;
|
||||
|
||||
void InferShape(framework::InferShapeContext* ctx) const override {
|
||||
PADDLE_ENFORCE(ctx->HasInput("X"),
|
||||
"Input(X) of MergeSelectedRowsOp should not be null.");
|
||||
PADDLE_ENFORCE(ctx->HasOutput("Out"),
|
||||
"Output(Out) of MergeSelectedRowsOp should not be null.");
|
||||
ctx->ShareDim("X", /*->*/ "Out");
|
||||
}
|
||||
};
|
||||
|
||||
class MergeSelectedRowsOpMaker : public framework::OpProtoAndCheckerMaker {
|
||||
public:
|
||||
void Make() override {
|
||||
AddInput("X",
|
||||
"The input type is SelectedRows, and the selected rows may be "
|
||||
"duplicated.");
|
||||
AddOutput("Out",
|
||||
"The output type is SelectedRows, and the selected rows are not "
|
||||
"duplicated.");
|
||||
AddComment(
|
||||
R"DOC(
|
||||
MergeSelectedRows Operator.
|
||||
|
||||
MergeSelectedRows is used to merge the duplicated rows of the input.
|
||||
)DOC");
|
||||
}
|
||||
};
|
||||
|
||||
class MergeSelectedRowsOpInferVarType
|
||||
: public framework::PassInDtypeAndVarTypeToOutput {
|
||||
protected:
|
||||
std::unordered_map<std::string, std::string> GetInputOutputWithSameType()
|
||||
const override {
|
||||
return std::unordered_map<std::string, std::string>{{"X", /*->*/ "Out"}};
|
||||
}
|
||||
};
|
||||
|
||||
} // namespace operators
|
||||
} // namespace paddle
|
||||
|
||||
namespace ops = paddle::operators;
|
||||
namespace plat = paddle::platform;
|
||||
REGISTER_OPERATOR(merge_selected_rows, ops::MergeSelectedRowsOp,
|
||||
ops::MergeSelectedRowsOpMaker,
|
||||
ops::MergeSelectedRowsOpInferVarType);
|
||||
|
||||
REGISTER_OP_CPU_KERNEL(
|
||||
merge_selected_rows,
|
||||
ops::MergeSelectedRowsKernel<plat::CPUDeviceContext, float>,
|
||||
ops::MergeSelectedRowsKernel<plat::CPUDeviceContext, double>);
|
@ -0,0 +1,23 @@
|
||||
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License. */
|
||||
|
||||
#include "paddle/fluid/operators/merge_selected_rows_op.h"
|
||||
|
||||
namespace ops = paddle::operators;
|
||||
namespace plat = paddle::platform;
|
||||
|
||||
REGISTER_OP_CUDA_KERNEL(
|
||||
merge_selected_rows,
|
||||
ops::MergeSelectedRowsKernel<plat::CUDADeviceContext, float>,
|
||||
ops::MergeSelectedRowsKernel<plat::CUDADeviceContext, double>);
|
@ -0,0 +1,36 @@
|
||||
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License. */
|
||||
|
||||
#pragma once
|
||||
#include <string>
|
||||
#include "paddle/fluid/framework/op_registry.h"
|
||||
#include "paddle/fluid/operators/math/selected_rows_functor.h"
|
||||
|
||||
namespace paddle {
|
||||
namespace operators {
|
||||
|
||||
template <typename DeviceContext, typename T>
|
||||
class MergeSelectedRowsKernel : public framework::OpKernel<T> {
|
||||
public:
|
||||
void Compute(const framework::ExecutionContext& context) const override {
|
||||
auto* x = context.Input<framework::SelectedRows>("X");
|
||||
auto* out = context.Output<framework::SelectedRows>("Out");
|
||||
|
||||
math::scatter::MergeAdd<DeviceContext, T> merge_func;
|
||||
merge_func(context.template device_context<DeviceContext>(), *x, out);
|
||||
}
|
||||
};
|
||||
|
||||
} // namespace operators
|
||||
} // namespace paddle
|
@ -1,84 +0,0 @@
|
||||
# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
from __future__ import print_function
|
||||
|
||||
import numpy as np
|
||||
import paddle
|
||||
import paddle.fluid as fluid
|
||||
|
||||
BATCH_SIZE = 128
|
||||
CLIP = 1
|
||||
|
||||
prog = fluid.framework.Program()
|
||||
with fluid.program_guard(main_program=prog):
|
||||
image = fluid.layers.data(name='x', shape=[784], dtype='float32')
|
||||
|
||||
hidden1 = fluid.layers.fc(input=image, size=128, act='relu')
|
||||
hidden2 = fluid.layers.fc(input=hidden1, size=64, act='relu')
|
||||
predict = fluid.layers.fc(input=hidden2, size=10, act='softmax')
|
||||
|
||||
label = fluid.layers.data(name='y', shape=[1], dtype='int64')
|
||||
|
||||
cost = fluid.layers.cross_entropy(input=predict, label=label)
|
||||
avg_cost = fluid.layers.mean(cost)
|
||||
|
||||
prog_clip = prog.clone()
|
||||
|
||||
avg_cost_clip = prog_clip.block(0).var(avg_cost.name)
|
||||
|
||||
p_g = fluid.backward.append_backward(loss=avg_cost)
|
||||
p_g_clip = fluid.backward.append_backward(loss=avg_cost_clip)
|
||||
|
||||
with fluid.program_guard(main_program=prog_clip):
|
||||
fluid.clip.set_gradient_clip(
|
||||
fluid.clip.GradientClipByGlobalNorm(clip_norm=CLIP))
|
||||
p_g_clip = fluid.clip.append_gradient_clip_ops(p_g_clip)
|
||||
|
||||
grad_list = [elem[1] for elem in p_g]
|
||||
grad_clip_list = [elem[1] for elem in p_g_clip]
|
||||
|
||||
train_reader = paddle.batch(
|
||||
paddle.reader.shuffle(
|
||||
paddle.dataset.mnist.train(), buf_size=8192),
|
||||
batch_size=BATCH_SIZE)
|
||||
|
||||
place = fluid.CPUPlace()
|
||||
exe = fluid.Executor(place)
|
||||
feeder = fluid.DataFeeder(feed_list=[image, label], place=place)
|
||||
exe.run(fluid.default_startup_program())
|
||||
|
||||
count = 0
|
||||
for data in train_reader():
|
||||
count += 1
|
||||
if count > 5:
|
||||
break
|
||||
out = exe.run(prog, feed=feeder.feed(data), fetch_list=grad_list)
|
||||
out_clip = exe.run(prog_clip,
|
||||
feed=feeder.feed(data),
|
||||
fetch_list=grad_clip_list)
|
||||
global_norm = 0
|
||||
for v in out[1:]:
|
||||
global_norm += np.sum(np.power(v, 2))
|
||||
global_norm = np.sqrt(global_norm)
|
||||
|
||||
global_norm_clip = 0
|
||||
for v in out_clip[1:]:
|
||||
global_norm_clip += np.sum(np.power(v, 2))
|
||||
global_norm_clip = np.sqrt(global_norm_clip)
|
||||
|
||||
if not np.isclose(
|
||||
a=global_norm_clip, b=np.minimum(global_norm, CLIP), rtol=5e-3):
|
||||
exit(1)
|
||||
exit(0)
|
@ -0,0 +1,65 @@
|
||||
# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
from __future__ import print_function
|
||||
|
||||
import unittest
|
||||
import paddle.fluid.core as core
|
||||
import numpy as np
|
||||
from paddle.fluid.op import Operator
|
||||
|
||||
|
||||
class TestGetTensorFromSelectedRows(unittest.TestCase):
|
||||
def get_places(self):
|
||||
places = [core.CPUPlace()]
|
||||
if core.is_compiled_with_cuda():
|
||||
places.append(core.CUDAPlace(0))
|
||||
return places
|
||||
|
||||
def check_with_place(self, place):
|
||||
scope = core.Scope()
|
||||
x_rows = [0, 5, 5, 4, 20]
|
||||
height = 20
|
||||
row_numel = 2
|
||||
|
||||
np_array = np.ones((len(x_rows), row_numel)).astype("float32")
|
||||
np_array[1, :] = 2.0
|
||||
np_array[2, :] = 3.0
|
||||
np_array[3, :] = 4.0
|
||||
|
||||
# initialize input variable X
|
||||
x = scope.var('X').get_selected_rows()
|
||||
x.set_rows(x_rows)
|
||||
x.set_height(height)
|
||||
x_tensor = x.get_tensor()
|
||||
x_tensor.set(np_array, place)
|
||||
|
||||
# initialize input variable Out
|
||||
out = scope.var("Out").get_tensor()
|
||||
|
||||
op = Operator("get_tensor_from_selected_rows", X="X", Out="Out")
|
||||
|
||||
op.run(scope, place)
|
||||
|
||||
out_array = np.array(out)
|
||||
self.assertEqual((5, 2), out_array.shape)
|
||||
assert (out_array == np_array).all()
|
||||
|
||||
def test_check_output(self):
|
||||
for place in self.get_places():
|
||||
self.check_with_place(place)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
unittest.main()
|
@ -0,0 +1,162 @@
|
||||
# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
from __future__ import print_function
|
||||
|
||||
import unittest
|
||||
import numpy as np
|
||||
import paddle
|
||||
import paddle.fluid.core as core
|
||||
import paddle.fluid as fluid
|
||||
|
||||
BATCH_SIZE = 128
|
||||
CLIP = 1
|
||||
|
||||
|
||||
def bow_net(data,
|
||||
label,
|
||||
dict_dim,
|
||||
emb_dim=128,
|
||||
hid_dim=128,
|
||||
hid_dim2=96,
|
||||
class_dim=2):
|
||||
"""
|
||||
BOW net
|
||||
This model is from https://github.com/PaddlePaddle/models:
|
||||
fluid/PaddleNLP/text_classification/nets.py
|
||||
"""
|
||||
emb = fluid.layers.embedding(
|
||||
input=data, is_sparse=True, size=[dict_dim, emb_dim])
|
||||
bow = fluid.layers.sequence_pool(input=emb, pool_type='sum')
|
||||
bow_tanh = fluid.layers.tanh(bow)
|
||||
fc_1 = fluid.layers.fc(input=bow_tanh, size=hid_dim, act="tanh")
|
||||
fc_2 = fluid.layers.fc(input=fc_1, size=hid_dim2, act="tanh")
|
||||
prediction = fluid.layers.fc(input=[fc_2], size=class_dim, act="softmax")
|
||||
cost = fluid.layers.cross_entropy(input=prediction, label=label)
|
||||
avg_cost = fluid.layers.mean(x=cost)
|
||||
|
||||
return avg_cost
|
||||
|
||||
|
||||
class TestGradientClip(unittest.TestCase):
|
||||
def setUp(self):
|
||||
self.word_dict = paddle.dataset.imdb.word_dict()
|
||||
self.BATCH_SIZE = 2
|
||||
self.train_data = paddle.batch(
|
||||
paddle.dataset.imdb.train(self.word_dict),
|
||||
batch_size=self.BATCH_SIZE)
|
||||
|
||||
def get_places(self):
|
||||
places = [core.CPUPlace()]
|
||||
if core.is_compiled_with_cuda():
|
||||
places.append(core.CUDAPlace(0))
|
||||
return places
|
||||
|
||||
def check_operators(self, place):
|
||||
prog = fluid.framework.Program()
|
||||
startup_program = fluid.framework.Program()
|
||||
with fluid.program_guard(
|
||||
main_program=prog, startup_program=startup_program):
|
||||
image = fluid.layers.data(name='x', shape=[784], dtype='float32')
|
||||
label = fluid.layers.data(name='y', shape=[1], dtype='int64')
|
||||
|
||||
hidden1 = fluid.layers.fc(input=image, size=128, act='relu')
|
||||
hidden2 = fluid.layers.fc(input=hidden1, size=64, act='relu')
|
||||
predict = fluid.layers.fc(input=hidden2, size=10, act='softmax')
|
||||
|
||||
cost = fluid.layers.cross_entropy(input=predict, label=label)
|
||||
avg_cost = fluid.layers.mean(cost)
|
||||
|
||||
prog_clip = prog.clone()
|
||||
|
||||
avg_cost_clip = prog_clip.block(0).var(avg_cost.name)
|
||||
|
||||
p_g = fluid.backward.append_backward(loss=avg_cost)
|
||||
p_g_clip = fluid.backward.append_backward(loss=avg_cost_clip)
|
||||
|
||||
with fluid.program_guard(main_program=prog_clip):
|
||||
fluid.clip.set_gradient_clip(
|
||||
fluid.clip.GradientClipByGlobalNorm(clip_norm=CLIP))
|
||||
p_g_clip = fluid.clip.append_gradient_clip_ops(p_g_clip)
|
||||
|
||||
grad_list = [elem[1] for elem in p_g]
|
||||
grad_clip_list = [elem[1] for elem in p_g_clip]
|
||||
|
||||
train_reader = paddle.batch(
|
||||
paddle.reader.shuffle(
|
||||
paddle.dataset.mnist.train(), buf_size=8192),
|
||||
batch_size=BATCH_SIZE)
|
||||
|
||||
exe = fluid.Executor(place)
|
||||
feeder = fluid.DataFeeder(feed_list=[image, label], place=place)
|
||||
exe.run(startup_program)
|
||||
|
||||
count = 0
|
||||
for data in train_reader():
|
||||
count += 1
|
||||
if count > 5:
|
||||
break
|
||||
out = exe.run(prog, feed=feeder.feed(data), fetch_list=grad_list)
|
||||
out_clip = exe.run(prog_clip,
|
||||
feed=feeder.feed(data),
|
||||
fetch_list=grad_clip_list)
|
||||
global_norm = 0
|
||||
for v in out[1:]:
|
||||
global_norm += np.sum(np.power(v, 2))
|
||||
global_norm = np.sqrt(global_norm)
|
||||
|
||||
global_norm_clip = 0
|
||||
for v in out_clip[1:]:
|
||||
global_norm_clip += np.sum(np.power(v, 2))
|
||||
global_norm_clip = np.sqrt(global_norm_clip)
|
||||
|
||||
assert np.isclose(
|
||||
a=global_norm_clip, b=np.minimum(global_norm, CLIP), rtol=5e-3)
|
||||
|
||||
def check_sparse_gradient_clip(self, place):
|
||||
prog = fluid.framework.Program()
|
||||
startup_program = fluid.framework.Program()
|
||||
with fluid.program_guard(
|
||||
main_program=prog, startup_program=startup_program):
|
||||
data = fluid.layers.data(
|
||||
name="words", shape=[1], dtype="int64", lod_level=1)
|
||||
label = fluid.layers.data(name="label", shape=[1], dtype="int64")
|
||||
cost = bow_net(data, label, len(self.word_dict))
|
||||
|
||||
fluid.clip.set_gradient_clip(
|
||||
clip=fluid.clip.GradientClipByGlobalNorm(clip_norm=5.0))
|
||||
|
||||
sgd_optimizer = fluid.optimizer.SGD(learning_rate=0.01)
|
||||
sgd_optimizer.minimize(cost)
|
||||
|
||||
exe = fluid.Executor(place)
|
||||
feeder = fluid.DataFeeder(feed_list=[data, label], place=place)
|
||||
exe.run(startup_program)
|
||||
|
||||
data = next(self.train_data())
|
||||
val = exe.run(prog, feed=feeder.feed(data), fetch_list=[cost])[0]
|
||||
self.assertEqual((1, ), val.shape)
|
||||
print(val)
|
||||
self.assertFalse(np.isnan(val))
|
||||
|
||||
def test_operators(self):
|
||||
self.check_operators(core.CPUPlace())
|
||||
|
||||
def test_sparse_gradient_clip(self):
|
||||
for place in self.get_places():
|
||||
self.check_sparse_gradient_clip(place)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
unittest.main()
|
@ -0,0 +1,73 @@
|
||||
# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
from __future__ import print_function
|
||||
|
||||
import unittest
|
||||
import paddle.fluid.core as core
|
||||
import numpy as np
|
||||
from paddle.fluid.op import Operator
|
||||
|
||||
|
||||
class TestMergeSelectedRows(unittest.TestCase):
|
||||
def get_places(self):
|
||||
places = [core.CPUPlace()]
|
||||
if core.is_compiled_with_cuda():
|
||||
places.append(core.CUDAPlace(0))
|
||||
return places
|
||||
|
||||
def check_with_place(self, place):
|
||||
scope = core.Scope()
|
||||
x_rows = [0, 5, 5, 4, 20]
|
||||
out_rows = [0, 4, 5, 20]
|
||||
height = 20
|
||||
row_numel = 2
|
||||
|
||||
np_array = np.ones((len(x_rows), row_numel)).astype("float32")
|
||||
np_array[1, :] = 2.0
|
||||
np_array[2, :] = 3.0
|
||||
np_array[3, :] = 4.0
|
||||
|
||||
# initialize input variable X
|
||||
x = scope.var('X').get_selected_rows()
|
||||
x.set_rows(x_rows)
|
||||
x.set_height(height)
|
||||
x_tensor = x.get_tensor()
|
||||
x_tensor.set(np_array, place)
|
||||
|
||||
# initialize input variable Out
|
||||
out = scope.var("Out").get_selected_rows()
|
||||
|
||||
op = Operator("merge_selected_rows", X="X", Out="Out")
|
||||
|
||||
op.run(scope, place)
|
||||
|
||||
self.assertEqual(out.rows(), out_rows)
|
||||
self.assertEqual(out.height(), height)
|
||||
|
||||
out_array = np.array(out.get_tensor())
|
||||
self.assertEqual((4, 2), out_array.shape)
|
||||
|
||||
assert (out_array[0, :] == 1.0).all()
|
||||
assert (out_array[1, :] == 4.0).all()
|
||||
assert (out_array[2, :] == 5.0).all()
|
||||
assert (out_array[3, :] == 1.0).all()
|
||||
|
||||
def test_check_output(self):
|
||||
for place in self.get_places():
|
||||
self.check_with_place(place)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
unittest.main()
|
Loading…
Reference in new issue