commit
bbc818a5a1
@ -0,0 +1,58 @@
|
||||
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License. */
|
||||
|
||||
#include "paddle/fluid/framework/ir/depthwise_conv_mkldnn_pass.h"
|
||||
#include "paddle/fluid/framework/ir/graph_pattern_detector.h"
|
||||
|
||||
namespace paddle {
|
||||
namespace framework {
|
||||
namespace ir {
|
||||
|
||||
#define GET_NODE(id, pattern) \
|
||||
PADDLE_ENFORCE(subgraph.count(pattern.RetrieveNode(#id)), \
|
||||
"pattern has no Node called %s", #id); \
|
||||
auto* id = subgraph.at(pattern.RetrieveNode(#id)); \
|
||||
PADDLE_ENFORCE_NOT_NULL(id, "subgraph has no node %s", #id);
|
||||
|
||||
std::unique_ptr<ir::Graph> DepthwiseConvMKLDNNPass::ApplyImpl(
|
||||
std::unique_ptr<ir::Graph> graph) const {
|
||||
PADDLE_ENFORCE(graph.get());
|
||||
FusePassBase::Init("depthwise_conv_mkldnn_pass", graph.get());
|
||||
GraphPatternDetector gpd;
|
||||
|
||||
auto* pattern = gpd.mutable_pattern();
|
||||
pattern->NewNode("depthwise_conv")
|
||||
->assert_is_op("depthwise_conv2d")
|
||||
->assert_op_attr("use_mkldnn", true);
|
||||
|
||||
int found_depthwise_conv_mkldnn_count = 0;
|
||||
auto handler = [&](const GraphPatternDetector::subgraph_t& subgraph,
|
||||
Graph* g) {
|
||||
VLOG(3) << "handle DepthwiseConvMKLDNN fuse";
|
||||
GET_NODE(depthwise_conv, (*pattern));
|
||||
depthwise_conv->Op()->SetType("conv2d");
|
||||
found_depthwise_conv_mkldnn_count++;
|
||||
};
|
||||
|
||||
gpd(graph.get(), handler);
|
||||
AddStatis(found_depthwise_conv_mkldnn_count);
|
||||
return graph;
|
||||
}
|
||||
|
||||
} // namespace ir
|
||||
} // namespace framework
|
||||
} // namespace paddle
|
||||
|
||||
REGISTER_PASS(depthwise_conv_mkldnn_pass,
|
||||
paddle::framework::ir::DepthwiseConvMKLDNNPass);
|
@ -0,0 +1,34 @@
|
||||
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License. */
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "paddle/fluid/framework/ir/fuse_pass_base.h"
|
||||
|
||||
namespace paddle {
|
||||
namespace framework {
|
||||
namespace ir {
|
||||
|
||||
class DepthwiseConvMKLDNNPass : public FusePassBase {
|
||||
public:
|
||||
virtual ~DepthwiseConvMKLDNNPass() {}
|
||||
|
||||
protected:
|
||||
std::unique_ptr<ir::Graph> ApplyImpl(
|
||||
std::unique_ptr<ir::Graph> graph) const override;
|
||||
};
|
||||
|
||||
} // namespace ir
|
||||
} // namespace framework
|
||||
} // namespace paddle
|
@ -0,0 +1,123 @@
|
||||
// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
#include "paddle/fluid/framework/ir/depthwise_conv_mkldnn_pass.h"
|
||||
|
||||
#include <gtest/gtest.h>
|
||||
|
||||
namespace paddle {
|
||||
namespace framework {
|
||||
namespace ir {
|
||||
|
||||
void SetOp(ProgramDesc* prog, const std::string& type, const std::string& name,
|
||||
const std::vector<std::string>& inputs,
|
||||
const std::vector<std::string>& outputs, bool use_mkldnn = false) {
|
||||
auto* op = prog->MutableBlock(0)->AppendOp();
|
||||
op->SetType(type);
|
||||
op->SetAttr("use_mkldnn", use_mkldnn);
|
||||
op->SetAttr("name", name);
|
||||
op->SetInput("Input", {inputs[0]});
|
||||
op->SetInput("Filter", {inputs[1]});
|
||||
op->SetInput("Bias", {inputs[2]});
|
||||
op->SetOutput("Out", outputs);
|
||||
}
|
||||
|
||||
// (a, weights, bias)->depthwise conv mkldnn->b
|
||||
// (b, weights2, bias2)->depthwise conv no mkldnn->c
|
||||
// (c, weights3, bias3)->conv mkldnn->d
|
||||
// (d, weights3, bias3)->conv no mkldnn->e
|
||||
ProgramDesc BuildProgramDesc() {
|
||||
ProgramDesc prog;
|
||||
for (auto& v : std::vector<std::string>(
|
||||
{"a", "b", "c", "d", "e", "weights", "bias", "weights2", "bias2",
|
||||
"weights3", "bias3", "weights4", "bias4"})) {
|
||||
auto* var = prog.MutableBlock(0)->Var(v);
|
||||
var->SetType(proto::VarType::SELECTED_ROWS);
|
||||
if (v == "weights" || v == "bias" || v == "weights2" || v == "bias2" ||
|
||||
v == "weights3" || v == "bias3" || v == "weights4" || v == "bias4") {
|
||||
var->SetPersistable(true);
|
||||
}
|
||||
}
|
||||
|
||||
// depthwise conv with MKL-DNN
|
||||
SetOp(&prog, "depthwise_conv2d", "conv1",
|
||||
std::vector<std::string>({"a", "weights", "bias"}),
|
||||
std::vector<std::string>({"b"}), true);
|
||||
// depthwise conv without MKL-DNN
|
||||
SetOp(&prog, "depthwise_conv2d", "conv2",
|
||||
std::vector<std::string>({"b", "weights2", "bias2"}),
|
||||
std::vector<std::string>({"c"}), false);
|
||||
// conv with MKL-DNN
|
||||
SetOp(&prog, "conv2d", "conv3",
|
||||
std::vector<std::string>({"c", "weights3", "bias3"}),
|
||||
std::vector<std::string>({"d"}), true);
|
||||
// conv without MKL-dNN
|
||||
SetOp(&prog, "conv2d", "conv4",
|
||||
std::vector<std::string>({"d", "weights4", "bias4"}),
|
||||
std::vector<std::string>({"e"}), false);
|
||||
|
||||
return prog;
|
||||
}
|
||||
|
||||
TEST(DepthwiseConvMKLDNNPass, basic) {
|
||||
auto prog = BuildProgramDesc();
|
||||
|
||||
std::unique_ptr<ir::Graph> graph(new ir::Graph(prog));
|
||||
|
||||
auto pass = PassRegistry::Instance().Get("depthwise_conv_mkldnn_pass");
|
||||
|
||||
struct counters {
|
||||
int mkldnn_depthwise_conv_nodes;
|
||||
int other_depthwise_conv_nodes;
|
||||
int mkldnn_conv_nodes;
|
||||
int other_conv_nodes;
|
||||
};
|
||||
|
||||
counters before{1, 1, 1, 1};
|
||||
|
||||
graph = pass->Apply(std::move(graph));
|
||||
|
||||
// initialize counters before loop
|
||||
counters after{0, 0, 0, 0};
|
||||
|
||||
for (auto* node : graph->Nodes()) {
|
||||
if (node->IsOp()) {
|
||||
auto* op = node->Op();
|
||||
if (op->Type() == "conv2d") {
|
||||
if (boost::get<bool>(op->GetAttr("use_mkldnn")))
|
||||
after.mkldnn_conv_nodes++;
|
||||
else
|
||||
after.other_conv_nodes++;
|
||||
} else if (op->Type() == "depthwise_conv2d") {
|
||||
if (boost::get<bool>(op->GetAttr("use_mkldnn")))
|
||||
after.mkldnn_depthwise_conv_nodes++;
|
||||
else
|
||||
after.other_depthwise_conv_nodes++;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
EXPECT_EQ(after.other_depthwise_conv_nodes,
|
||||
before.other_depthwise_conv_nodes);
|
||||
EXPECT_EQ(after.other_conv_nodes, before.other_conv_nodes);
|
||||
EXPECT_EQ(after.mkldnn_depthwise_conv_nodes,
|
||||
before.mkldnn_depthwise_conv_nodes - 1);
|
||||
EXPECT_EQ(after.mkldnn_conv_nodes, before.mkldnn_conv_nodes + 1);
|
||||
}
|
||||
|
||||
} // namespace ir
|
||||
} // namespace framework
|
||||
} // namespace paddle
|
||||
|
||||
USE_PASS(depthwise_conv_mkldnn_pass);
|
@ -0,0 +1,112 @@
|
||||
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License. */
|
||||
|
||||
#include "paddle/fluid/framework/op_registry.h"
|
||||
#include "paddle/fluid/platform/cudnn_helper.h"
|
||||
|
||||
namespace paddle {
|
||||
namespace operators {
|
||||
|
||||
using Tensor = framework::Tensor;
|
||||
using ScopedSpatialTransformerDescriptor =
|
||||
platform::ScopedSpatialTransformerDescriptor;
|
||||
|
||||
template <typename T>
|
||||
class CUDNNAffineGridOpKernel : public framework::OpKernel<T> {
|
||||
public:
|
||||
void Compute(const framework::ExecutionContext& ctx) const override {
|
||||
PADDLE_ENFORCE(platform::is_gpu_place(ctx.GetPlace()),
|
||||
"It must use CUDAPlace.");
|
||||
auto& dev_ctx = ctx.template device_context<platform::CUDADeviceContext>();
|
||||
auto handle = dev_ctx.cudnn_handle();
|
||||
auto* theta = ctx.Input<Tensor>("Theta");
|
||||
auto* output = ctx.Output<Tensor>("Output");
|
||||
const T* theta_data = theta->data<T>();
|
||||
|
||||
int n = theta->dims()[0];
|
||||
auto size_attr = ctx.Attr<std::vector<int>>("output_shape");
|
||||
Tensor h_sizes;
|
||||
int* h_size_data;
|
||||
if (size_attr.size() == 0) {
|
||||
auto* output_shape = ctx.Input<Tensor>("OutputShape");
|
||||
framework::TensorCopy(*output_shape, platform::CPUPlace(), &h_sizes);
|
||||
h_size_data = h_sizes.data<int>();
|
||||
} else {
|
||||
h_size_data = h_sizes.mutable_data<int>({4}, platform::CPUPlace());
|
||||
h_size_data[0] = n;
|
||||
h_size_data[1] = size_attr[1];
|
||||
h_size_data[2] = size_attr[2];
|
||||
h_size_data[3] = size_attr[3];
|
||||
}
|
||||
|
||||
T* output_data = output->mutable_data<T>(
|
||||
{n, h_size_data[2], h_size_data[3], 2}, ctx.GetPlace());
|
||||
ScopedSpatialTransformerDescriptor st_desc;
|
||||
cudnnSpatialTransformerDescriptor_t cudnn_st_desc =
|
||||
st_desc.descriptor<T>(4, h_size_data);
|
||||
|
||||
PADDLE_ENFORCE(platform::dynload::cudnnSpatialTfGridGeneratorForward(
|
||||
handle, cudnn_st_desc, theta_data, output_data));
|
||||
}
|
||||
};
|
||||
|
||||
template <typename T>
|
||||
class CUDNNAffineGridGradOpKernel : public framework::OpKernel<T> {
|
||||
public:
|
||||
void Compute(const framework::ExecutionContext& ctx) const override {
|
||||
PADDLE_ENFORCE(platform::is_gpu_place(ctx.GetPlace()),
|
||||
"It must use CUDAPlace.");
|
||||
auto& dev_ctx = ctx.template device_context<platform::CUDADeviceContext>();
|
||||
auto handle = dev_ctx.cudnn_handle();
|
||||
auto output_grad = ctx.Input<Tensor>(framework::GradVarName("Output"));
|
||||
auto theta_grad = ctx.Output<Tensor>(framework::GradVarName("Theta"));
|
||||
|
||||
int n = output_grad->dims()[0];
|
||||
auto size_attr = ctx.Attr<std::vector<int>>("output_shape");
|
||||
Tensor h_sizes;
|
||||
int* h_size_data;
|
||||
if (size_attr.size() == 0) {
|
||||
auto* output_shape = ctx.Input<Tensor>("OutputShape");
|
||||
framework::TensorCopy(*output_shape, platform::CPUPlace(), &h_sizes);
|
||||
h_size_data = h_sizes.data<int>();
|
||||
} else {
|
||||
h_size_data = h_sizes.mutable_data<int>({4}, platform::CPUPlace());
|
||||
h_size_data[0] = n;
|
||||
h_size_data[1] = size_attr[1];
|
||||
h_size_data[2] = size_attr[2];
|
||||
h_size_data[3] = size_attr[3];
|
||||
}
|
||||
|
||||
ScopedSpatialTransformerDescriptor st_desc;
|
||||
cudnnSpatialTransformerDescriptor_t cudnn_st_desc =
|
||||
st_desc.descriptor<T>(4, h_size_data);
|
||||
|
||||
const T* output_grad_data = output_grad->data<T>();
|
||||
T* theta_grad_data = theta_grad->mutable_data<T>(ctx.GetPlace());
|
||||
|
||||
PADDLE_ENFORCE(platform::dynload::cudnnSpatialTfGridGeneratorBackward(
|
||||
handle, cudnn_st_desc, output_grad_data, theta_grad_data));
|
||||
}
|
||||
};
|
||||
|
||||
} // namespace operators
|
||||
} // namespace paddle
|
||||
|
||||
namespace plat = paddle::platform;
|
||||
REGISTER_OP_KERNEL(affine_grid, CUDNN, plat::CUDAPlace,
|
||||
paddle::operators::CUDNNAffineGridOpKernel<float>,
|
||||
paddle::operators::CUDNNAffineGridOpKernel<double>);
|
||||
REGISTER_OP_KERNEL(affine_grid_grad, CUDNN, plat::CUDAPlace,
|
||||
paddle::operators::CUDNNAffineGridGradOpKernel<float>,
|
||||
paddle::operators::CUDNNAffineGridGradOpKernel<double>);
|
@ -0,0 +1,233 @@
|
||||
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License. */
|
||||
|
||||
#include "paddle/fluid/operators/affine_grid_op.h"
|
||||
#include <string>
|
||||
#include "paddle/fluid/framework/op_registry.h"
|
||||
#ifdef PADDLE_WITH_CUDA
|
||||
#include "paddle/fluid/platform/cudnn_helper.h"
|
||||
#endif
|
||||
|
||||
namespace paddle {
|
||||
namespace operators {
|
||||
|
||||
using Tensor = framework::Tensor;
|
||||
|
||||
template <typename T>
|
||||
struct Linspace<paddle::platform::CPUDeviceContext, T> {
|
||||
framework::Tensor operator()(T start, T end, int count,
|
||||
const framework::ExecutionContext& ctx) {
|
||||
Tensor numbers;
|
||||
T* number_data = numbers.mutable_data<T>({count}, platform::CPUPlace());
|
||||
T slice = (end - start) / (T)(count - 1);
|
||||
for (int i = 0; i < count; ++i) {
|
||||
number_data[i] = start + (T)i * slice;
|
||||
}
|
||||
return numbers;
|
||||
}
|
||||
};
|
||||
|
||||
class AffineGridOp : public framework::OperatorWithKernel {
|
||||
public:
|
||||
using framework::OperatorWithKernel::OperatorWithKernel;
|
||||
void InferShape(framework::InferShapeContext* ctx) const override {
|
||||
PADDLE_ENFORCE(ctx->HasInput("Theta"),
|
||||
"Input(Theta) of AffineGridOp should not be null.");
|
||||
PADDLE_ENFORCE(ctx->HasOutput("Output"),
|
||||
"Output(Output) of AffineGridOp should not be null.");
|
||||
auto theta_dims = ctx->GetInputDim("Theta");
|
||||
PADDLE_ENFORCE(theta_dims.size() == 3,
|
||||
"AffineGrid's Input(Theta) should be 3-D tensor.");
|
||||
|
||||
auto output_shape = ctx->Attrs().Get<std::vector<int>>("output_shape");
|
||||
if (output_shape.size() == 0) {
|
||||
PADDLE_ENFORCE(ctx->HasInput("OutputShape"),
|
||||
"Input(OutputShape) of AffineGridOp should not be null if "
|
||||
"attr(output_shape) is not configured.");
|
||||
auto output_shape_dims = ctx->GetInputDim("OutputShape");
|
||||
PADDLE_ENFORCE(output_shape_dims.size() == 1,
|
||||
"AffineGrid's Input(OutputShape) should be 1-D tensor.");
|
||||
} else {
|
||||
PADDLE_ENFORCE(output_shape.size() == 4,
|
||||
"The size of attr(output_shape) should be 4.");
|
||||
}
|
||||
|
||||
PADDLE_ENFORCE(theta_dims[1] == 2, "Input(theta) dims[1] should be 2.");
|
||||
PADDLE_ENFORCE(theta_dims[2] == 3, "Input(theta) dims[2] should be 3.");
|
||||
// N * H * W * 2
|
||||
ctx->SetOutputDim("Output",
|
||||
framework::make_ddim({theta_dims[0], -1, -1, 2}));
|
||||
ctx->ShareLoD("Theta", "Output");
|
||||
}
|
||||
|
||||
protected:
|
||||
framework::OpKernelType GetExpectedKernelType(
|
||||
const framework::ExecutionContext& ctx) const override {
|
||||
framework::LibraryType library{framework::LibraryType::kPlain};
|
||||
#ifdef PADDLE_WITH_CUDA
|
||||
if (platform::CanCUDNNBeUsed(ctx)) {
|
||||
library = framework::LibraryType::kCUDNN;
|
||||
}
|
||||
#endif
|
||||
auto data_type = framework::ToDataType(ctx.Input<Tensor>("Theta")->type());
|
||||
return framework::OpKernelType(data_type, ctx.GetPlace(),
|
||||
framework::DataLayout::kAnyLayout, library);
|
||||
}
|
||||
};
|
||||
|
||||
class AffineGridOpMaker : public framework::OpProtoAndCheckerMaker {
|
||||
public:
|
||||
void Make() override {
|
||||
AddInput(
|
||||
"Theta",
|
||||
"(Tensor) A batch of affine transform parameters with shape [N, 2, 3]. "
|
||||
"It is used to transform coordinate (x_0, y_0) to coordinate (x_1, "
|
||||
"y_1).");
|
||||
AddInput("OutputShape",
|
||||
"(Tensor) The shape of target image with format [N, C, H, W].")
|
||||
.AsDispensable();
|
||||
AddOutput("Output", "(Tensor) Output Tensor with shape [N, H, W, 2].");
|
||||
AddAttr<bool>(
|
||||
"use_cudnn",
|
||||
"(bool, default false) Only used in cudnn kernel, need install cudnn")
|
||||
.SetDefault(true);
|
||||
AddAttr<std::vector<int>>(
|
||||
"output_shape",
|
||||
"The target output image shape with format [N, C, H, W].")
|
||||
.SetDefault(std::vector<int>());
|
||||
|
||||
AddComment(R"DOC(
|
||||
It generates a grid of (x,y) coordinates using the parameters of the
|
||||
affine transformation that correspond to a set of points where the input
|
||||
feature map should be sampled to produce the transformed output feature map.
|
||||
|
||||
Given:
|
||||
Theta = [[[x_11, x_12, x_13]
|
||||
[x_14, x_15, x_16]]
|
||||
[[x_21, x_22, x_23]
|
||||
[x_24, x_25, x_26]]]
|
||||
|
||||
OutputShape = [2, 3, 5, 5]
|
||||
|
||||
Step 1:
|
||||
|
||||
Generate relative coordinates according to OutputShape.
|
||||
The values of relative coordinates are in the interval between -1 and 1.
|
||||
The shape of the relative coordinates is [2, H, W] as below:
|
||||
|
||||
C = [[[-1. -1. -1. -1. -1. ]
|
||||
[-0.5 -0.5 -0.5 -0.5 -0.5]
|
||||
[ 0. 0. 0. 0. 0. ]
|
||||
[ 0.5 0.5 0.5 0.5 0.5]
|
||||
[ 1. 1. 1. 1. 1. ]]
|
||||
[[-1. -0.5 0. 0.5 1. ]
|
||||
[-1. -0.5 0. 0.5 1. ]
|
||||
[-1. -0.5 0. 0.5 1. ]
|
||||
[-1. -0.5 0. 0.5 1. ]
|
||||
[-1. -0.5 0. 0.5 1. ]]]
|
||||
C[0] is the coordinates in height axis and C[1] is the coordinates in width axis.
|
||||
|
||||
Step2:
|
||||
Tanspose and reshape C to shape [H * W, 2] and append ones to last dimension. The we get:
|
||||
C_ = [[-1. -1. 1. ]
|
||||
[-0.5 -1. 1. ]
|
||||
[ 0. -1. 1. ]
|
||||
[ 0.5 -1. 1. ]
|
||||
[ 1. -1. 1. ]
|
||||
[-1. -0.5 1. ]
|
||||
[-0.5 -0.5 1. ]
|
||||
[ 0. -0.5 1. ]
|
||||
[ 0.5 -0.5 1. ]
|
||||
[ 1. -0.5 1. ]
|
||||
[-1. 0. 1. ]
|
||||
[-0.5 0. 1. ]
|
||||
[ 0. 0. 1. ]
|
||||
[ 0.5 0. 1. ]
|
||||
[ 1. 0. 1. ]
|
||||
[-1. 0.5 1. ]
|
||||
[-0.5 0.5 1. ]
|
||||
[ 0. 0.5 1. ]
|
||||
[ 0.5 0.5 1. ]
|
||||
[ 1. 0.5 1. ]
|
||||
[-1. 1. 1. ]
|
||||
[-0.5 1. 1. ]
|
||||
[ 0. 1. 1. ]
|
||||
[ 0.5 1. 1. ]
|
||||
[ 1. 1. 1. ]]
|
||||
Step3:
|
||||
Compute output by equation $$Output[i] = C_ * Theta[i]^T$$
|
||||
)DOC");
|
||||
}
|
||||
};
|
||||
|
||||
class AffineGridOpGrad : public framework::OperatorWithKernel {
|
||||
public:
|
||||
using framework::OperatorWithKernel::OperatorWithKernel;
|
||||
void InferShape(framework::InferShapeContext* ctx) const override {
|
||||
auto theta_dims = ctx->GetInputDim("Theta");
|
||||
if (ctx->HasOutput(framework::GradVarName("Theta"))) {
|
||||
ctx->SetOutputDim(framework::GradVarName("Theta"), theta_dims);
|
||||
}
|
||||
}
|
||||
|
||||
protected:
|
||||
framework::OpKernelType GetExpectedKernelType(
|
||||
const framework::ExecutionContext& ctx) const override {
|
||||
framework::LibraryType library_{framework::LibraryType::kPlain};
|
||||
#ifdef PADDLE_WITH_CUDA
|
||||
if (platform::CanCUDNNBeUsed(ctx)) {
|
||||
library_ = framework::LibraryType::kCUDNN;
|
||||
}
|
||||
#endif
|
||||
return framework::OpKernelType(
|
||||
framework::ToDataType(ctx.Input<Tensor>("Theta")->type()),
|
||||
ctx.GetPlace(), framework::DataLayout::kAnyLayout, library_);
|
||||
}
|
||||
};
|
||||
|
||||
class AffineGridGradMaker : public framework::SingleGradOpDescMaker {
|
||||
public:
|
||||
using framework::SingleGradOpDescMaker::SingleGradOpDescMaker;
|
||||
|
||||
protected:
|
||||
std::unique_ptr<framework::OpDesc> Apply() const override {
|
||||
auto* op = new framework::OpDesc();
|
||||
op->SetType("affine_grid_grad");
|
||||
op->SetInput("Theta", Input("Theta"));
|
||||
op->SetInput("OutputShape", Input("OutputShape"));
|
||||
op->SetInput(framework::GradVarName("Output"), OutputGrad("Output"));
|
||||
|
||||
op->SetAttrMap(Attrs());
|
||||
|
||||
op->SetOutput(framework::GradVarName("Theta"), InputGrad("Theta"));
|
||||
return std::unique_ptr<framework::OpDesc>(op);
|
||||
}
|
||||
};
|
||||
|
||||
} // namespace operators
|
||||
} // namespace paddle
|
||||
|
||||
namespace ops = paddle::operators;
|
||||
REGISTER_OPERATOR(affine_grid, ops::AffineGridOp, ops::AffineGridOpMaker,
|
||||
ops::AffineGridGradMaker);
|
||||
REGISTER_OPERATOR(affine_grid_grad, ops::AffineGridOpGrad);
|
||||
|
||||
REGISTER_OP_CPU_KERNEL(
|
||||
affine_grid,
|
||||
ops::AffineGridOpKernel<paddle::platform::CPUDeviceContext, float>,
|
||||
ops::AffineGridOpKernel<paddle::platform::CPUDeviceContext, double>);
|
||||
REGISTER_OP_CPU_KERNEL(
|
||||
affine_grid_grad,
|
||||
ops::AffineGridGradOpKernel<paddle::platform::CPUDeviceContext, float>,
|
||||
ops::AffineGridGradOpKernel<paddle::platform::CPUDeviceContext, double>);
|
@ -0,0 +1,190 @@
|
||||
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License. */
|
||||
|
||||
#pragma once
|
||||
#include <vector>
|
||||
#include "paddle/fluid/framework/eigen.h"
|
||||
#include "paddle/fluid/framework/op_registry.h"
|
||||
#include "paddle/fluid/operators/math/blas.h"
|
||||
#include "paddle/fluid/operators/math/math_function.h"
|
||||
|
||||
namespace paddle {
|
||||
namespace operators {
|
||||
|
||||
using Tensor = framework::Tensor;
|
||||
template <typename T, size_t D, int MajorType = Eigen::RowMajor,
|
||||
typename IndexType = Eigen::DenseIndex>
|
||||
using EigenTensor = framework::EigenTensor<T, D, MajorType, IndexType>;
|
||||
|
||||
using Array1 = Eigen::DSizes<int64_t, 1>;
|
||||
using Array2 = Eigen::DSizes<int64_t, 2>;
|
||||
using Array3 = Eigen::DSizes<int64_t, 3>;
|
||||
using Array4 = Eigen::DSizes<int64_t, 4>;
|
||||
|
||||
/**
|
||||
*Return a tensor with evenly spaced numbers over a specified interval.
|
||||
*/
|
||||
template <typename DeviceContext, typename T>
|
||||
struct Linspace {
|
||||
framework::Tensor operator()(T start, T end, int count,
|
||||
const framework::ExecutionContext& ctx);
|
||||
};
|
||||
|
||||
template <typename DeviceContext, typename T>
|
||||
class AffineGridOpKernel : public framework::OpKernel<T> {
|
||||
public:
|
||||
void Compute(const framework::ExecutionContext& ctx) const override {
|
||||
auto& place = *ctx.template device_context<DeviceContext>().eigen_device();
|
||||
auto* theta = ctx.Input<Tensor>("Theta");
|
||||
int n = theta->dims()[0];
|
||||
|
||||
auto size_attr = ctx.Attr<std::vector<int>>("output_shape");
|
||||
int h = 0;
|
||||
int w = 0;
|
||||
if (size_attr.size() == 0) {
|
||||
auto* output_shape = ctx.Input<Tensor>("OutputShape");
|
||||
Tensor h_sizes;
|
||||
framework::TensorCopy(*output_shape, platform::CPUPlace(), &h_sizes);
|
||||
const int* h_size_data = h_sizes.data<int>();
|
||||
h = h_size_data[2];
|
||||
w = h_size_data[3];
|
||||
} else {
|
||||
h = size_attr[2];
|
||||
w = size_attr[3];
|
||||
}
|
||||
|
||||
auto* output = ctx.Output<Tensor>("Output");
|
||||
output->mutable_data<T>({n, h, w, 2}, ctx.GetPlace());
|
||||
|
||||
math::SetConstant<DeviceContext, T>()(
|
||||
ctx.template device_context<DeviceContext>(), output,
|
||||
static_cast<T>(0));
|
||||
|
||||
Linspace<DeviceContext, T> linspace;
|
||||
// Get indexes of height with shape [height, width, 1]
|
||||
auto h_idx = linspace((T)-1, (T)1, h, ctx);
|
||||
auto h_idx_t = EigenTensor<T, 1>::From(h_idx);
|
||||
// Get indexes of width with shape [height, width, 1]
|
||||
auto w_idx = linspace((T)-1, (T)1, w, ctx);
|
||||
auto w_idx_t = EigenTensor<T, 1>::From(w_idx);
|
||||
// Get constant ones tensor with shape [height, width, 1]
|
||||
Tensor ones;
|
||||
ones.mutable_data<T>({h, w, 1}, ctx.GetPlace());
|
||||
auto ones_t = EigenTensor<T, 3>::From(ones).setConstant((T)1);
|
||||
// Get grid tensor with shape [n, h, w, 3] by concatenating h_idx, w_idx and
|
||||
// ones
|
||||
Tensor grid;
|
||||
grid.mutable_data<T>({n, h, w, 3}, ctx.GetPlace());
|
||||
auto grid_t = EigenTensor<T, 4>::From(grid);
|
||||
|
||||
grid_t.device(place) = w_idx_t.reshape(Array2(1, w))
|
||||
.broadcast(Array2(h, 1))
|
||||
.reshape(Array3(h, w, 1))
|
||||
.concatenate(h_idx_t.reshape(Array2(1, h))
|
||||
.broadcast(Array2(w, 1))
|
||||
.shuffle(Array2(1, 0))
|
||||
.reshape(Array3(h, w, 1)),
|
||||
2)
|
||||
.eval()
|
||||
.concatenate(ones_t, 2)
|
||||
.reshape(Array4(1, h, w, 3))
|
||||
.broadcast(Array4(n, 1, 1, 1));
|
||||
|
||||
// output = grid * theta.T
|
||||
// TODO(wanghaoshuang): Refine batched matrix multiply
|
||||
auto blas = math::GetBlas<DeviceContext, T>(ctx);
|
||||
for (int i = 0; i < n; ++i) {
|
||||
Tensor sliced_grid = grid.Slice(i, i + 1).Resize({h * w, 3});
|
||||
Tensor sliced_theta = theta->Slice(i, i + 1).Resize({2, 3});
|
||||
Tensor sliced_out = output->Slice(i, i + 1).Resize({h * w, 2});
|
||||
blas.MatMul(sliced_grid, false, sliced_theta, true, T(1), &sliced_out,
|
||||
T(0));
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
template <typename DeviceContext, typename T>
|
||||
class AffineGridGradOpKernel : public framework::OpKernel<T> {
|
||||
public:
|
||||
void Compute(const framework::ExecutionContext& ctx) const override {
|
||||
auto& place = *ctx.template device_context<DeviceContext>().eigen_device();
|
||||
auto output_grad = ctx.Input<Tensor>(framework::GradVarName("Output"));
|
||||
auto theta_grad = ctx.Output<Tensor>(framework::GradVarName("Theta"));
|
||||
|
||||
int n = output_grad->dims()[0];
|
||||
auto size_attr = ctx.Attr<std::vector<int>>("output_shape");
|
||||
int h = 0;
|
||||
int w = 0;
|
||||
if (size_attr.size() == 0) {
|
||||
auto* output_shape = ctx.Input<Tensor>("OutputShape");
|
||||
Tensor h_sizes;
|
||||
framework::TensorCopy(*output_shape, platform::CPUPlace(), &h_sizes);
|
||||
const int* h_size_data = h_sizes.data<int>();
|
||||
h = h_size_data[2];
|
||||
w = h_size_data[3];
|
||||
} else {
|
||||
h = size_attr[2];
|
||||
w = size_attr[3];
|
||||
}
|
||||
|
||||
theta_grad->mutable_data<T>({n, 2, 3}, ctx.GetPlace());
|
||||
|
||||
math::SetConstant<DeviceContext, T>()(
|
||||
ctx.template device_context<DeviceContext>(), theta_grad,
|
||||
static_cast<T>(0));
|
||||
|
||||
Linspace<DeviceContext, T> linspace;
|
||||
|
||||
// Get indexes of height with shape [height, width, 1]
|
||||
auto h_idx = linspace((T)-1, (T)1, h, ctx);
|
||||
auto h_idx_t = EigenTensor<T, 1>::From(h_idx);
|
||||
// Get indexes of width with shape [height, width, 1]
|
||||
auto w_idx = linspace((T)-1, (T)1, w, ctx);
|
||||
auto w_idx_t = EigenTensor<T, 1>::From(w_idx);
|
||||
// Get constant ones tensor with shape [height, width, 1]
|
||||
Tensor ones;
|
||||
ones.mutable_data<T>({h, w, 1}, ctx.GetPlace());
|
||||
auto ones_t = EigenTensor<T, 3>::From(ones).setConstant((T)1);
|
||||
// Get grid tensor with shape [n, h, w, 3] by concatenating h_idx, w_idx and
|
||||
// ones
|
||||
Tensor grid;
|
||||
grid.mutable_data<T>({n, h, w, 3}, ctx.GetPlace());
|
||||
auto grid_t = EigenTensor<T, 4>::From(grid);
|
||||
grid_t.device(place) = w_idx_t.reshape(Array2(1, w))
|
||||
.broadcast(Array2(h, 1))
|
||||
.reshape(Array3(h, w, 1))
|
||||
.concatenate(h_idx_t.reshape(Array2(1, h))
|
||||
.broadcast(Array2(w, 1))
|
||||
.shuffle(Array2(1, 0))
|
||||
.reshape(Array3(h, w, 1)),
|
||||
2)
|
||||
.eval()
|
||||
.concatenate(ones_t, 2)
|
||||
.reshape(Array4(1, h, w, 3))
|
||||
.broadcast(Array4(n, 1, 1, 1));
|
||||
// output = grid * theta.T
|
||||
// TODO(wanghaoshuang): Refine batched matrix multiply
|
||||
auto blas = math::GetBlas<DeviceContext, T>(ctx);
|
||||
for (int i = 0; i < n; ++i) {
|
||||
Tensor sliced_grid = grid.Slice(i, i + 1).Resize({h * w, 3});
|
||||
Tensor sliced_out_grad = output_grad->Slice(i, i + 1).Resize({h * w, 2});
|
||||
Tensor sliced_theta_grad = theta_grad->Slice(i, i + 1).Resize({2, 3});
|
||||
blas.MatMul(sliced_out_grad, true, sliced_grid, false, T(1),
|
||||
&sliced_theta_grad, T(0));
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
} // namespace operators
|
||||
} // namespace paddle
|
@ -0,0 +1,174 @@
|
||||
# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
from __future__ import print_function
|
||||
|
||||
import os
|
||||
import sys
|
||||
import signal
|
||||
import subprocess
|
||||
import argparse
|
||||
import time
|
||||
import math
|
||||
import random
|
||||
from multiprocessing import Process
|
||||
from functools import reduce
|
||||
|
||||
import numpy as np
|
||||
import unittest
|
||||
import six
|
||||
|
||||
import paddle
|
||||
import paddle.fluid as fluid
|
||||
from paddle.fluid import core
|
||||
from paddle.fluid import io
|
||||
|
||||
from test_dist_base import TestDistRunnerBase, runtime_main, RUN_STEP
|
||||
from dist_simnet_bow import TestDistSimnetBow2x2, DATA_URL, DATA_MD5
|
||||
|
||||
|
||||
class TestDistSaveLoad2x2(TestDistSimnetBow2x2):
|
||||
def _load_persistable_vars(self, executor, dirname, program):
|
||||
def _is_checkpoint_var(var):
|
||||
"""
|
||||
the checkpoint will not save or load all the variables.
|
||||
var type is FEED_MINIBATCH/FETCH_LIST/RAW or var name ends with @GRAD are discarded.
|
||||
|
||||
: param var(Variable)
|
||||
"""
|
||||
if var.desc.type() == core.VarDesc.VarType.FEED_MINIBATCH or \
|
||||
var.desc.type() == core.VarDesc.VarType.FETCH_LIST or \
|
||||
var.desc.type() == core.VarDesc.VarType.RAW:
|
||||
return False
|
||||
# @GRAD are named for gradient variables, checkpoint will not save it.
|
||||
if "@GRAD" in var.name:
|
||||
return False
|
||||
# .trainer_ are named for distribute train variables, checkpoint will not save it.
|
||||
if ".trainer_" in var.name:
|
||||
return False
|
||||
|
||||
# .block is named for distribute train variables, checkpoint will not save it.
|
||||
if ".block" in var.name:
|
||||
return False
|
||||
|
||||
if "tmp_" in var.name:
|
||||
return False
|
||||
|
||||
return var.persistable
|
||||
|
||||
io.load_vars(
|
||||
executor,
|
||||
dirname=dirname,
|
||||
main_program=program,
|
||||
predicate=_is_checkpoint_var,
|
||||
filename=None)
|
||||
|
||||
def run_pserver(self, args):
|
||||
self.get_model(batch_size=2)
|
||||
# NOTE: pserver should not call memory optimize
|
||||
t = self.get_transpiler(args.trainer_id,
|
||||
fluid.default_main_program(), args.endpoints,
|
||||
args.trainers, args.sync_mode)
|
||||
pserver_prog = t.get_pserver_program(args.current_endpoint)
|
||||
startup_prog = t.get_startup_program(args.current_endpoint,
|
||||
pserver_prog)
|
||||
|
||||
need_load = bool(int(os.getenv("LOAD", "0")))
|
||||
model_dir = os.getenv("MODEL_DIR", "")
|
||||
|
||||
place = fluid.CPUPlace()
|
||||
exe = fluid.Executor(place)
|
||||
exe.run(startup_prog)
|
||||
|
||||
if need_load and model_dir:
|
||||
self._load_persistable_vars(exe, model_dir, startup_prog)
|
||||
exe.run(pserver_prog)
|
||||
|
||||
def run_trainer(self, args):
|
||||
test_program, avg_cost, train_reader, test_reader, batch_acc, predict = \
|
||||
self.get_model(batch_size=2)
|
||||
|
||||
if args.mem_opt:
|
||||
fluid.memory_optimize(fluid.default_main_program(), skip_grads=True)
|
||||
if args.is_dist:
|
||||
t = self.get_transpiler(args.trainer_id,
|
||||
fluid.default_main_program(),
|
||||
args.endpoints, args.trainers,
|
||||
args.sync_mode)
|
||||
|
||||
trainer_prog = t.get_trainer_program()
|
||||
else:
|
||||
trainer_prog = fluid.default_main_program()
|
||||
|
||||
if args.use_cuda:
|
||||
place = fluid.CUDAPlace(0)
|
||||
else:
|
||||
place = fluid.CPUPlace()
|
||||
|
||||
startup_exe = fluid.Executor(place)
|
||||
startup_exe.run(fluid.default_startup_program())
|
||||
|
||||
strategy = fluid.ExecutionStrategy()
|
||||
strategy.num_threads = 1
|
||||
strategy.allow_op_delay = False
|
||||
|
||||
build_stra = fluid.BuildStrategy()
|
||||
|
||||
if args.use_reduce:
|
||||
build_stra.reduce_strategy = fluid.BuildStrategy.ReduceStrategy.Reduce
|
||||
else:
|
||||
build_stra.reduce_strategy = fluid.BuildStrategy.ReduceStrategy.AllReduce
|
||||
|
||||
exe = fluid.ParallelExecutor(
|
||||
args.use_cuda,
|
||||
loss_name=avg_cost.name,
|
||||
exec_strategy=strategy,
|
||||
build_strategy=build_stra)
|
||||
|
||||
feed_var_list = [
|
||||
var for var in trainer_prog.global_block().vars.values()
|
||||
if var.is_data
|
||||
]
|
||||
|
||||
feeder = fluid.DataFeeder(feed_var_list, place)
|
||||
reader_generator = train_reader()
|
||||
|
||||
def get_data():
|
||||
origin_batch = next(reader_generator)
|
||||
if args.is_dist and args.use_reader_alloc:
|
||||
new_batch = []
|
||||
for offset, item in enumerate(origin_batch):
|
||||
if offset % 2 == args.trainer_id:
|
||||
new_batch.append(item)
|
||||
return new_batch
|
||||
else:
|
||||
return origin_batch
|
||||
|
||||
need_save = bool(int(os.getenv("SAVE", "0")))
|
||||
model_dir = os.getenv("MODEL_DIR", "")
|
||||
|
||||
if need_save:
|
||||
for _ in six.moves.xrange(RUN_STEP):
|
||||
loss, = exe.run(fetch_list=[avg_cost.name],
|
||||
feed=feeder.feed(get_data()))
|
||||
if need_save and model_dir:
|
||||
io.save_persistables(startup_exe, model_dir, trainer_prog)
|
||||
|
||||
var = np.array(fluid.global_scope().find_var('__fc_b__').get_tensor())
|
||||
print(np.ravel(var).tolist())
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
paddle.dataset.common.download(DATA_URL, 'simnet', DATA_MD5, "train")
|
||||
runtime_main(TestDistSaveLoad2x2)
|
@ -0,0 +1,79 @@
|
||||
# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
import unittest
|
||||
import numpy as np
|
||||
from op_test import OpTest
|
||||
|
||||
|
||||
def AffineGrid(theta, size):
|
||||
n = size[0]
|
||||
w = size[3]
|
||||
h = size[2]
|
||||
h_idx = np.repeat(
|
||||
np.linspace(-1, 1, h)[np.newaxis, :], w, axis=0).T[:, :, np.newaxis]
|
||||
w_idx = np.repeat(
|
||||
np.linspace(-1, 1, w)[np.newaxis, :], h, axis=0)[:, :, np.newaxis]
|
||||
grid = np.concatenate(
|
||||
[w_idx, h_idx, np.ones([h, w, 1])], axis=2) # h * w * 3
|
||||
grid = np.repeat(grid[np.newaxis, :], size[0], axis=0) # n * h * w *3
|
||||
|
||||
ret = np.zeros([n, h * w, 2])
|
||||
theta = theta.transpose([0, 2, 1])
|
||||
for i in range(len(theta)):
|
||||
ret[i] = np.dot(grid[i].reshape([h * w, 3]), theta[i])
|
||||
|
||||
# print ret.reshape([h * w, 2]).astype("float32")
|
||||
return ret.reshape([n, h, w, 2]).astype("float32")
|
||||
|
||||
|
||||
class TestAffineGridOp(OpTest):
|
||||
def setUp(self):
|
||||
self.initTestCase()
|
||||
self.op_type = "affine_grid"
|
||||
theta = np.random.randint(1, 3, self.theta_shape).astype("float32")
|
||||
theta = np.ones(self.theta_shape).astype("float32")
|
||||
self.inputs = {'Theta': theta}
|
||||
self.attrs = {"use_cudnn": True}
|
||||
if self.dynamic_shape:
|
||||
self.inputs['OutputShape'] = self.output_shape
|
||||
else:
|
||||
self.attrs['output_shape'] = self.output_shape
|
||||
self.outputs = {'Output': AffineGrid(theta, self.output_shape)}
|
||||
|
||||
def test_check_output(self):
|
||||
self.check_output()
|
||||
|
||||
def test_check_grad_normal(self):
|
||||
self.check_grad(
|
||||
['Theta'],
|
||||
'Output',
|
||||
no_grad_set=['OutputShape'],
|
||||
max_relative_error=0.006)
|
||||
|
||||
def initTestCase(self):
|
||||
self.theta_shape = (3, 2, 3)
|
||||
self.output_shape = np.array([3, 2, 5, 7]).astype("int32")
|
||||
self.dynamic_shape = False
|
||||
|
||||
|
||||
class TestAffineGridOpCase1(TestAffineGridOp):
|
||||
def initTestCase(self):
|
||||
self.theta_shape = (3, 2, 3)
|
||||
self.output_shape = np.array([3, 2, 5, 7]).astype("int32")
|
||||
self.dynamic_shape = True
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
unittest.main()
|
@ -0,0 +1,90 @@
|
||||
# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
from __future__ import print_function
|
||||
|
||||
import os
|
||||
import shutil
|
||||
import unittest
|
||||
import tempfile
|
||||
|
||||
import numpy as np
|
||||
|
||||
from test_dist_base import TestDistBase, RUN_STEP
|
||||
|
||||
|
||||
class TestDistSaveLoadDense2x2(TestDistBase):
|
||||
def _setup_config(self):
|
||||
self._sync_mode = True
|
||||
self._enforce_place = "CPU"
|
||||
|
||||
def check_with_place(self,
|
||||
model_file,
|
||||
delta=1e-3,
|
||||
check_error_log=False,
|
||||
need_envs={}):
|
||||
|
||||
required_envs = {
|
||||
"PATH": os.getenv("PATH", ""),
|
||||
"PYTHONPATH": os.getenv("PYTHONPATH", ""),
|
||||
"LD_LIBRARY_PATH": os.getenv("LD_LIBRARY_PATH", ""),
|
||||
"http_proxy": ""
|
||||
}
|
||||
|
||||
required_envs.update(need_envs)
|
||||
|
||||
if check_error_log:
|
||||
required_envs["GLOG_v"] = "7"
|
||||
required_envs["GLOG_logtostderr"] = "1"
|
||||
|
||||
model_dir = tempfile.mkdtemp()
|
||||
|
||||
local_env = {}
|
||||
local_env["SAVE"] = "1"
|
||||
local_env["MODEL_DIR"] = model_dir
|
||||
local_env.update(required_envs)
|
||||
|
||||
cluster_env = {}
|
||||
cluster_env["LOAD"] = "1"
|
||||
cluster_env["MODEL_DIR"] = model_dir
|
||||
cluster_env.update(required_envs)
|
||||
|
||||
local_var = self._run_local(model_file, local_env, check_error_log)
|
||||
tr0_var, tr1_var = self._run_cluster(model_file, cluster_env,
|
||||
check_error_log)
|
||||
|
||||
shutil.rmtree(model_dir)
|
||||
|
||||
local_np = np.array(eval(local_var[0]))
|
||||
train0_np = np.array(eval(tr0_var[0]))
|
||||
train1_np = np.array(eval(tr1_var[0]))
|
||||
self.assertAlmostEqual(local_np.all(), train0_np.all(), delta=delta)
|
||||
self.assertAlmostEqual(local_np.all(), train1_np.all(), delta=delta)
|
||||
self.assertAlmostEqual(train0_np.all(), train1_np.all(), delta=delta)
|
||||
|
||||
@unittest.skip(reason="CI fail")
|
||||
def test_dist(self):
|
||||
need_envs = {
|
||||
"IS_DISTRIBUTED": '0',
|
||||
"IS_SPARSE": '0',
|
||||
'IS_SELF_CONTAINED_LR': '1'
|
||||
}
|
||||
self.check_with_place(
|
||||
"dist_save_load.py",
|
||||
delta=0,
|
||||
check_error_log=False,
|
||||
need_envs=need_envs)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
unittest.main()
|
@ -0,0 +1,130 @@
|
||||
# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
from __future__ import print_function
|
||||
|
||||
import unittest
|
||||
import paddle
|
||||
import paddle.fluid as fluid
|
||||
import paddle.fluid.core as core
|
||||
import numpy as np
|
||||
from threading import Thread
|
||||
|
||||
|
||||
def user_reader(inputs):
|
||||
def _reader():
|
||||
for d in inputs:
|
||||
yield d
|
||||
|
||||
return _reader
|
||||
|
||||
|
||||
def batch_feeder(batch_reader, pin_memory=False, img_dtype="float32"):
|
||||
def _feeder():
|
||||
for batch_data in batch_reader():
|
||||
sample_batch = []
|
||||
label_batch = []
|
||||
for sample, label in batch_data:
|
||||
sample_batch.append(sample)
|
||||
label_batch.append([label])
|
||||
tensor = core.LoDTensor()
|
||||
label = core.LoDTensor()
|
||||
place = core.CUDAPinnedPlace() if pin_memory else core.CPUPlace()
|
||||
tensor.set(np.array(sample_batch, dtype=img_dtype), place)
|
||||
label.set(np.array(label_batch, dtype="int64"), place)
|
||||
yield [tensor, label]
|
||||
|
||||
return _feeder
|
||||
|
||||
|
||||
class TestPyReader(unittest.TestCase):
|
||||
def setUp(self):
|
||||
self.capacity = 10
|
||||
self.shapes = [(-1, 3, 2, 1), (-1, 1)]
|
||||
self.lod_levels = [0, 0]
|
||||
self.dtypes = ['float32', 'int64']
|
||||
|
||||
def test_pin_memory_pyreader(self):
|
||||
with fluid.program_guard(fluid.Program(), fluid.Program()):
|
||||
place = fluid.CUDAPlace(0) if fluid.core.is_compiled_with_cuda(
|
||||
) else fluid.CPUPlace()
|
||||
executor = fluid.Executor(place)
|
||||
|
||||
data_file = fluid.layers.py_reader(
|
||||
capacity=self.capacity,
|
||||
dtypes=self.dtypes,
|
||||
lod_levels=self.lod_levels,
|
||||
shapes=self.shapes)
|
||||
# feed_queue = data_file.queue
|
||||
read_out_data = fluid.layers.read_file(data_file)
|
||||
|
||||
self.inputs = []
|
||||
for _ in range(10):
|
||||
sample = np.random.uniform(
|
||||
low=0, high=1, size=[3, 2, 1]).astype("float32")
|
||||
label = np.random.uniform(
|
||||
low=0, high=10, size=[1]).astype("int64")
|
||||
self.inputs.append((sample, label))
|
||||
|
||||
self.input_tensors = []
|
||||
for d, l in batch_feeder(
|
||||
paddle.batch(
|
||||
user_reader(self.inputs), batch_size=2),
|
||||
pin_memory=True
|
||||
if fluid.core.is_compiled_with_cuda() else False)():
|
||||
ta = fluid.LoDTensorArray()
|
||||
ta.append(d)
|
||||
ta.append(l)
|
||||
self.input_tensors.append(ta)
|
||||
|
||||
self.batched_inputs = []
|
||||
for batch in paddle.batch(user_reader(self.inputs), batch_size=2)():
|
||||
feed_d = []
|
||||
feed_l = []
|
||||
for d, l in batch:
|
||||
feed_d.append(d)
|
||||
feed_l.append([l])
|
||||
self.batched_inputs.append([feed_d, feed_l])
|
||||
|
||||
data_file.decorate_tensor_provider(
|
||||
batch_feeder(
|
||||
paddle.batch(
|
||||
user_reader(self.inputs), batch_size=2),
|
||||
pin_memory=True
|
||||
if fluid.core.is_compiled_with_cuda() else False))
|
||||
|
||||
executor.run(fluid.default_startup_program())
|
||||
self.outputs = []
|
||||
|
||||
data_file.start()
|
||||
for _ in self.input_tensors:
|
||||
self.outputs.append(
|
||||
executor.run(fetch_list=list(read_out_data)))
|
||||
data_file.reset()
|
||||
self.validate()
|
||||
|
||||
def validate(self):
|
||||
self.assertEqual(len(self.batched_inputs), len(self.outputs))
|
||||
for in_data_list, out_data_list in zip(self.batched_inputs,
|
||||
self.outputs):
|
||||
self.assertEqual(len(in_data_list), len(out_data_list))
|
||||
in_data_list_np = [
|
||||
np.array(in_lod_tensor) for in_lod_tensor in in_data_list
|
||||
]
|
||||
for in_data, out_data in zip(in_data_list_np, out_data_list):
|
||||
self.assertTrue((in_data == out_data).all())
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
unittest.main()
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in new issue