revert-15207-remove_op_handle_lock_and_fix_var
commit
f702f8fd10
File diff suppressed because it is too large
Load Diff
@ -0,0 +1,130 @@
|
||||
// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
#ifndef PADDLE_FLUID_FRAMEWORK_IR_LOCK_FREE_OPTIMIZE_PASS_H_
|
||||
#define PADDLE_FLUID_FRAMEWORK_IR_LOCK_FREE_OPTIMIZE_PASS_H_
|
||||
|
||||
#include <string>
|
||||
#include <vector>
|
||||
|
||||
#include <boost/algorithm/string/predicate.hpp>
|
||||
|
||||
#include "paddle/fluid/framework/ir/graph.h"
|
||||
#include "paddle/fluid/framework/ir/pass.h"
|
||||
|
||||
namespace paddle {
|
||||
namespace framework {
|
||||
namespace ir {
|
||||
|
||||
class Node;
|
||||
|
||||
/*
|
||||
* Remove the sum op of all gradients of the backward op.
|
||||
* And remove the dependecies of the optimizer related to the
|
||||
* same backward op.
|
||||
*
|
||||
* Before this pass:
|
||||
*
|
||||
* forward_op1 forward_op2
|
||||
* | |
|
||||
* grad_op1 grad_op2
|
||||
* \ /
|
||||
* \ /
|
||||
* sum_op
|
||||
* |
|
||||
* sgd_op
|
||||
*
|
||||
* After this pass:
|
||||
* forward_op1 forward_op2
|
||||
* | |
|
||||
* grad_op1 grad_op2
|
||||
* | |
|
||||
* sgd_op1 sgd_op2
|
||||
*
|
||||
* sgd_op1 and sgd_op2 will update the same weight which holds the same
|
||||
* memory, so we could benefits from the acceleration
|
||||
*/
|
||||
class LockFreeOptimizePass : public Pass {
|
||||
public:
|
||||
virtual ~LockFreeOptimizePass() {}
|
||||
|
||||
protected:
|
||||
std::unique_ptr<ir::Graph> ApplyImpl(std::unique_ptr<ir::Graph> graph) const;
|
||||
|
||||
private:
|
||||
// Create a new sgd node via current optimizer node
|
||||
ir::Node* CreateNewSGDNode(ir::Graph* graph, ir::Node* forward_node,
|
||||
ir::Node* backward_node, ir::Node* grad_sum_node,
|
||||
ir::Node* optimize_node) const;
|
||||
|
||||
// Replace the input weight's optimizers
|
||||
void ReplaceUpstreamNode(ir::Node* upstream_node,
|
||||
ir::Node* old_optimizer_node,
|
||||
ir::Node* new_optimizer_node) const;
|
||||
|
||||
// Replace the output weight's optimizers
|
||||
void ReplaceAllDownstreamNode(ir::Node* old_optimizer_node,
|
||||
ir::Node* new_optimizer_node) const;
|
||||
|
||||
// Find all weight variables in graph
|
||||
bool FindAllWeightVars(ir::Graph* graph) const;
|
||||
|
||||
// Find the forward_op node via the backward_op node
|
||||
ir::Node* FindForwardOpViaBackwardOp(ir::Graph* graph,
|
||||
ir::Node* backward_node) const;
|
||||
|
||||
std::vector<ir::Node*> FindConnectedNode(ir::Node* upstream_node,
|
||||
ir::Node* downstream_node) const;
|
||||
|
||||
inline bool IsOpNamed(ir::Node* node, const std::string& name) const {
|
||||
PADDLE_ENFORCE(node);
|
||||
|
||||
return node->NodeType() == Node::Type::kOperation && node->Name() == name;
|
||||
}
|
||||
|
||||
inline bool IsVarNamed(ir::Node* node, const std::string& name) const {
|
||||
PADDLE_ENFORCE(node);
|
||||
|
||||
return node->NodeType() == Node::Type::kVariable && node->Name() == name;
|
||||
}
|
||||
|
||||
inline bool IsVarNameEndsWith(ir::Node* node, const std::string& name) const {
|
||||
PADDLE_ENFORCE(node);
|
||||
|
||||
return node->NodeType() == Node::Type::kVariable &&
|
||||
boost::algorithm::ends_with(node->Name(), name);
|
||||
}
|
||||
|
||||
inline bool IsVarNameContains(ir::Node* node, const std::string& name) const {
|
||||
PADDLE_ENFORCE(node);
|
||||
|
||||
return node->NodeType() == Node::Type::kVariable &&
|
||||
node->Name().find(name) != std::string::npos;
|
||||
}
|
||||
|
||||
inline bool IsControlDepFrom(ir::Node* ctrl_dep_node, ir::Node* node) const {
|
||||
PADDLE_ENFORCE(ctrl_dep_node);
|
||||
PADDLE_ENFORCE(node);
|
||||
|
||||
return IsControlDepVar(*ctrl_dep_node) &&
|
||||
ctrl_dep_node->inputs.size() >= 1u &&
|
||||
ctrl_dep_node->inputs[0] == node;
|
||||
}
|
||||
};
|
||||
|
||||
} // namespace ir
|
||||
} // namespace framework
|
||||
} // namespace paddle
|
||||
|
||||
#endif // PADDLE_FLUID_FRAMEWORK_IR_LOCK_FREE_OPTIMIZE_PASS_H_
|
File diff suppressed because it is too large
Load Diff
@ -0,0 +1,194 @@
|
||||
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License. */
|
||||
|
||||
#include "paddle/fluid/operators/fused/fused_embedding_seq_pool_op.h"
|
||||
#include "paddle/fluid/framework/var_type_inference.h"
|
||||
|
||||
namespace paddle {
|
||||
namespace operators {
|
||||
|
||||
class FusedEmbeddingSeqPoolOp : public framework::OperatorWithKernel {
|
||||
public:
|
||||
using framework::OperatorWithKernel::OperatorWithKernel;
|
||||
|
||||
void InferShape(framework::InferShapeContext* ctx) const override {
|
||||
PADDLE_ENFORCE(ctx->HasInput("W"),
|
||||
"Input W of FusedEmbeddingSeqPoolOp should not be null.");
|
||||
PADDLE_ENFORCE(ctx->HasInput("Ids"),
|
||||
"Input Ids of FusedEmbeddingSeqPoolOp should not be null.");
|
||||
PADDLE_ENFORCE(ctx->HasOutput("Out"),
|
||||
"Output of FusedEmbeddingSeqPoolOp should not be null.");
|
||||
|
||||
auto table_dims = ctx->GetInputDim("W");
|
||||
auto ids_dims = ctx->GetInputDim("Ids");
|
||||
const std::string& combiner = ctx->Attrs().Get<std::string>("combiner");
|
||||
|
||||
PADDLE_ENFORCE_EQ(table_dims.size(), 2);
|
||||
PADDLE_ENFORCE_GE(ids_dims.size(), 1,
|
||||
"The dim size of the 'Ids' tensor must greater than 1.");
|
||||
PADDLE_ENFORCE_EQ(ids_dims[ids_dims.size() - 1], 1,
|
||||
"The last dimension of the 'Ids' tensor must be 1.");
|
||||
// we only support sum now
|
||||
PADDLE_ENFORCE_EQ(combiner, "sum");
|
||||
|
||||
int64_t last_dim = table_dims[1];
|
||||
for (int i = 1; i != ids_dims.size(); ++i) {
|
||||
last_dim *= ids_dims[i];
|
||||
}
|
||||
|
||||
if (ctx->IsRuntime()) {
|
||||
framework::Variable* ids_var =
|
||||
boost::get<framework::Variable*>(ctx->GetInputVarPtrs("Ids")[0]);
|
||||
const auto& ids_lod = ids_var->Get<LoDTensor>().lod();
|
||||
|
||||
// in run time, the LoD of ids must be 1
|
||||
PADDLE_ENFORCE(ids_lod.size(), 1u,
|
||||
"The LoD level of Input(Ids) must be 1");
|
||||
PADDLE_ENFORCE_GE(ids_lod[0].size(), 1u, "The LoD could NOT be empty");
|
||||
|
||||
int64_t batch_size = ids_lod[0].size() - 1;
|
||||
|
||||
// in run time, the shape from Ids -> output
|
||||
// should be [seq_length, 1] -> [batch_size, embedding_size]
|
||||
ctx->SetOutputDim("Out", framework::make_ddim({batch_size, last_dim}));
|
||||
} else {
|
||||
// in compile time, the lod level of ids must be 1
|
||||
framework::VarDesc* ids_desc =
|
||||
boost::get<framework::VarDesc*>(ctx->GetInputVarPtrs("Ids")[0]);
|
||||
PADDLE_ENFORCE_EQ(ids_desc->GetLoDLevel(), 1);
|
||||
|
||||
// in compile time, the shape from Ids -> output
|
||||
// should be [-1, 1] -> [-1, embedding_size]
|
||||
ctx->SetOutputDim("Out", framework::make_ddim({-1, last_dim}));
|
||||
}
|
||||
}
|
||||
|
||||
protected:
|
||||
framework::OpKernelType GetExpectedKernelType(
|
||||
const framework::ExecutionContext& ctx) const override {
|
||||
auto data_type = framework::GetDataTypeOfVar(ctx.InputVar("W"));
|
||||
return framework::OpKernelType(data_type, ctx.device_context());
|
||||
}
|
||||
};
|
||||
|
||||
class FusedEmbeddingSeqPoolOpMaker : public framework::OpProtoAndCheckerMaker {
|
||||
public:
|
||||
void Make() override {
|
||||
AddInput("W",
|
||||
"(Tensor) The input represents embedding tensors, "
|
||||
"which is a learnable parameter.");
|
||||
AddInput("Ids",
|
||||
"An input with type int32 or int64 "
|
||||
"contains the ids to be looked up in W. "
|
||||
"The last dimension size must be 1.");
|
||||
AddOutput("Out", "The lookup results, which have the same type as W.");
|
||||
AddAttr<std::string>("combiner",
|
||||
"(string, default sum) "
|
||||
"A string specifying the reduction op. Currently sum "
|
||||
"are supported, sum computes the weighted sum of the "
|
||||
"embedding results for each row.")
|
||||
.SetDefault("sum");
|
||||
// NOTE(minqiyang): grad_inplace is an temporal attribute,
|
||||
// please do NOT set this attribute in python layer.
|
||||
AddAttr<bool>("grad_inplace",
|
||||
"(boolean, default false) "
|
||||
"If the grad op reuse the input's variable.")
|
||||
.SetDefault(false);
|
||||
AddAttr<bool>("is_sparse",
|
||||
"(boolean, default false) "
|
||||
"Sparse update.")
|
||||
.SetDefault(false);
|
||||
AddComment(R"DOC(
|
||||
FusedEmbeddingSeqPool Operator.
|
||||
|
||||
Computes embeddings for the given ids and weights.
|
||||
|
||||
This operator is used to perform lookups on the parameter W,
|
||||
then computes the weighted sum of the lookups results for each row
|
||||
and concatenated into a dense tensor.
|
||||
|
||||
The input Ids should carry the LoD (Level of Details) information.
|
||||
And the output will change the LoD information with input Ids.
|
||||
|
||||
)DOC");
|
||||
}
|
||||
};
|
||||
|
||||
class FusedEmbeddingSeqPoolOpGradDescMaker
|
||||
: public framework::DefaultGradOpDescMaker<true> {
|
||||
using ::paddle::framework::DefaultGradOpDescMaker<
|
||||
true>::DefaultGradOpDescMaker;
|
||||
|
||||
protected:
|
||||
virtual std::string GradOpType() const {
|
||||
return "fused_embedding_seq_pool_grad";
|
||||
}
|
||||
};
|
||||
|
||||
class FusedEmbeddingSeqPoolOpGrad : public framework::OperatorWithKernel {
|
||||
public:
|
||||
using framework::OperatorWithKernel::OperatorWithKernel;
|
||||
|
||||
void InferShape(framework::InferShapeContext* ctx) const override {
|
||||
auto table_dims = ctx->GetInputDim("W");
|
||||
ctx->SetOutputDim(framework::GradVarName("W"), table_dims);
|
||||
}
|
||||
|
||||
protected:
|
||||
framework::OpKernelType GetExpectedKernelType(
|
||||
const framework::ExecutionContext& ctx) const override {
|
||||
auto data_type = framework::GetDataTypeOfVar(ctx.InputVar("W"));
|
||||
return framework::OpKernelType(data_type, ctx.device_context());
|
||||
}
|
||||
};
|
||||
|
||||
class FusedEmbeddingSeqPoolOpGradVarTypeInference
|
||||
: public framework::VarTypeInference {
|
||||
public:
|
||||
void operator()(const framework::OpDesc& op_desc,
|
||||
framework::BlockDesc* block) const override {
|
||||
auto out_var_name = op_desc.Output(framework::GradVarName("W")).front();
|
||||
auto attr = op_desc.GetAttr("is_sparse");
|
||||
bool is_sparse = boost::get<bool>(attr);
|
||||
if (is_sparse) {
|
||||
VLOG(3) << "fused_embedding_seq_pool_grad op "
|
||||
<< framework::GradVarName("W") << " is set to SelectedRows";
|
||||
block->Var(out_var_name)
|
||||
->SetType(framework::proto::VarType::SELECTED_ROWS);
|
||||
} else {
|
||||
VLOG(3) << "fused_embedding_seq_pool_grad op "
|
||||
<< framework::GradVarName("W") << " is set to LoDTensor";
|
||||
block->Var(out_var_name)->SetType(framework::proto::VarType::LOD_TENSOR);
|
||||
}
|
||||
block->Var(out_var_name)->SetDataType(block->Var("W")->GetDataType());
|
||||
}
|
||||
};
|
||||
|
||||
} // namespace operators
|
||||
} // namespace paddle
|
||||
|
||||
namespace ops = paddle::operators;
|
||||
REGISTER_OPERATOR(fused_embedding_seq_pool, ops::FusedEmbeddingSeqPoolOp,
|
||||
ops::FusedEmbeddingSeqPoolOpGradDescMaker,
|
||||
ops::FusedEmbeddingSeqPoolOpMaker);
|
||||
REGISTER_OPERATOR(fused_embedding_seq_pool_grad,
|
||||
ops::FusedEmbeddingSeqPoolOpGrad,
|
||||
ops::FusedEmbeddingSeqPoolOpGradVarTypeInference);
|
||||
|
||||
REGISTER_OP_CPU_KERNEL(fused_embedding_seq_pool,
|
||||
ops::FusedEmbeddingSeqPoolKernel<float>,
|
||||
ops::FusedEmbeddingSeqPoolKernel<double>);
|
||||
REGISTER_OP_CPU_KERNEL(fused_embedding_seq_pool_grad,
|
||||
ops::FusedEmbeddingSeqPoolGradKernel<float>,
|
||||
ops::FusedEmbeddingSeqPoolGradKernel<double>);
|
@ -0,0 +1,142 @@
|
||||
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License. */
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <string>
|
||||
#include <vector>
|
||||
|
||||
#include "paddle/fluid/framework/eigen.h"
|
||||
#include "paddle/fluid/framework/lod_tensor.h"
|
||||
#include "paddle/fluid/framework/op_registry.h"
|
||||
#include "paddle/fluid/framework/selected_rows.h"
|
||||
#include "paddle/fluid/operators/math/blas.h"
|
||||
|
||||
namespace paddle {
|
||||
namespace operators {
|
||||
|
||||
using Tensor = framework::Tensor;
|
||||
using LoDTensor = framework::LoDTensor;
|
||||
using SelectedRows = framework::SelectedRows;
|
||||
using DDim = framework::DDim;
|
||||
|
||||
template <typename T>
|
||||
struct EmbeddingVSumFunctor {
|
||||
void operator()(const framework::ExecutionContext &context,
|
||||
const LoDTensor *table_t, const LoDTensor *ids_t,
|
||||
LoDTensor *output_t) {
|
||||
auto *table = table_t->data<T>();
|
||||
int64_t row_number = table_t->dims()[0];
|
||||
int64_t row_width = table_t->dims()[1];
|
||||
int64_t last_dim = output_t->dims()[1];
|
||||
const int64_t *ids = ids_t->data<int64_t>();
|
||||
auto ids_lod = ids_t->lod()[0];
|
||||
int64_t ids_count = ids_t->numel() / ids_lod.back();
|
||||
|
||||
auto *output = output_t->mutable_data<T>(context.GetPlace());
|
||||
|
||||
auto blas = math::GetBlas<platform::CPUDeviceContext, T>(context);
|
||||
for (int64_t i = 0; i != ids_lod.size() - 1; ++i) {
|
||||
size_t begin = ids_lod[i] * ids_count;
|
||||
for (int64_t j = 0; j != ids_count; ++j) {
|
||||
PADDLE_ENFORCE_LT(ids[begin], row_number);
|
||||
PADDLE_ENFORCE_GE(ids[begin], 0, "ids %d", i);
|
||||
blas.VCOPY(row_width, table + ids[begin + j] * row_width,
|
||||
output + i * last_dim + j * row_width);
|
||||
}
|
||||
|
||||
for (int64_t r = (ids_lod[i] + 1) * ids_count;
|
||||
r < ids_lod[i + 1] * ids_count; ++r) {
|
||||
PADDLE_ENFORCE_LT(ids[r], row_number);
|
||||
PADDLE_ENFORCE_GE(ids[r], 0, "ids %d", i);
|
||||
blas.AXPY(row_width, 1., table + ids[r] * row_width,
|
||||
output + i * last_dim + (r % ids_count) * row_width);
|
||||
}
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
template <typename T>
|
||||
class FusedEmbeddingSeqPoolKernel : public framework::OpKernel<T> {
|
||||
public:
|
||||
void Compute(const framework::ExecutionContext &context) const override {
|
||||
const LoDTensor *ids_t = context.Input<LoDTensor>("Ids"); // int tensor
|
||||
LoDTensor *output_t = context.Output<LoDTensor>("Out"); // float tensor
|
||||
const LoDTensor *table_var = context.Input<LoDTensor>("W");
|
||||
const std::string &combiner_type = context.Attr<std::string>("combiner");
|
||||
|
||||
if (combiner_type == "sum") {
|
||||
EmbeddingVSumFunctor<T> functor;
|
||||
functor(context, table_var, ids_t, output_t);
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
template <typename T>
|
||||
class FusedEmbeddingSeqPoolGradKernel : public framework::OpKernel<T> {
|
||||
public:
|
||||
void Compute(const framework::ExecutionContext &context) const override {
|
||||
auto *table_var = context.InputVar("W");
|
||||
DDim table_dim;
|
||||
if (table_var->IsType<LoDTensor>()) {
|
||||
table_dim = context.Input<LoDTensor>("W")->dims();
|
||||
} else if (table_var->IsType<SelectedRows>()) {
|
||||
auto *table_t = context.Input<SelectedRows>("W");
|
||||
table_dim = table_t->value().dims();
|
||||
} else {
|
||||
PADDLE_THROW(
|
||||
"The parameter W of a LookupTable "
|
||||
"must be either LoDTensor or SelectedRows");
|
||||
}
|
||||
|
||||
bool is_sparse = context.Attr<bool>("is_sparse");
|
||||
// Since paddings are not trainable and fixed in forward, the gradient of
|
||||
// paddings makes no sense and we don't deal with it in backward.
|
||||
if (is_sparse) {
|
||||
auto *ids = context.Input<LoDTensor>("Ids");
|
||||
auto *d_output = context.Input<LoDTensor>(framework::GradVarName("Out"));
|
||||
auto *d_table = context.Output<SelectedRows>(framework::GradVarName("W"));
|
||||
|
||||
auto *ids_data = ids->data<int64_t>();
|
||||
int64_t ids_num = ids->numel();
|
||||
auto lod = ids->lod()[0];
|
||||
int64_t row_width = d_output->dims()[1];
|
||||
|
||||
framework::Vector<int64_t> *new_rows = d_table->mutable_rows();
|
||||
new_rows->resize(ids_num);
|
||||
std::memcpy(&(*new_rows)[0], ids_data, ids_num * sizeof(int64_t));
|
||||
|
||||
auto *d_table_value = d_table->mutable_value();
|
||||
d_table_value->Resize({ids_num, table_dim[1]});
|
||||
T *d_table_data = d_table_value->mutable_data<T>(context.GetPlace());
|
||||
const T *d_output_data = d_output->data<T>();
|
||||
|
||||
auto blas = math::GetBlas<platform::CPUDeviceContext, T>(context);
|
||||
for (int i = 0; i < static_cast<int>(lod.size()) - 1; ++i) {
|
||||
int64_t h = static_cast<int64_t>(lod[i + 1] - lod[i]);
|
||||
int64_t in_offset = lod[i] * row_width;
|
||||
const T *out_pos = d_output_data + i * row_width;
|
||||
T *in_pos = d_table_data + in_offset;
|
||||
for (int r = 0; r != h; ++r) {
|
||||
blas.VCOPY(row_width, out_pos, in_pos + r * row_width);
|
||||
}
|
||||
}
|
||||
} else {
|
||||
LOG(ERROR) << "Dense is not supported in fused_embedding_seq_pool_op now";
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
} // namespace operators
|
||||
} // namespace paddle
|
@ -0,0 +1,61 @@
|
||||
/*Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License. */
|
||||
|
||||
#ifdef PADDLE_WITH_NGRAPH
|
||||
#pragma once
|
||||
|
||||
#include <string>
|
||||
#include "ngraph/ngraph.hpp"
|
||||
#include "paddle/fluid/platform/ngraph_helper.h"
|
||||
|
||||
namespace paddle {
|
||||
namespace operators {
|
||||
namespace ngraphs {
|
||||
|
||||
template <typename T>
|
||||
std::shared_ptr<ngraph::Node> ElementwiseScalar(
|
||||
float scale, std::shared_ptr<ngraph::Node> node) {
|
||||
auto node_shape = node->get_shape();
|
||||
auto scale_const = ngraph::op::Constant::create(node->get_element_type(),
|
||||
node_shape, {scale});
|
||||
return std::make_shared<T>(scale_const, node);
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
std::shared_ptr<ngraph::Node> ElementwiseScalar(
|
||||
std::shared_ptr<ngraph::Node> scale_1d,
|
||||
std::shared_ptr<ngraph::Node> node) {
|
||||
auto scale_shape = scale_1d->get_shape();
|
||||
PADDLE_ENFORCE_EQ(scale_shape.size(), 1, "Supporting 1d scale node");
|
||||
PADDLE_ENFORCE_EQ(scale_shape.at(0), 1, "scale 1d in in shape {1}");
|
||||
|
||||
auto node_shape = node->get_shape();
|
||||
ngraph::AxisSet axis_set;
|
||||
for (size_t i = 0; i < node_shape.size(); ++i) {
|
||||
axis_set.insert(i);
|
||||
}
|
||||
node_shape.push_back(1);
|
||||
|
||||
auto scale_bcast =
|
||||
std::make_shared<ngraph::op::Broadcast>(scale_1d, node_shape, axis_set);
|
||||
|
||||
auto scale_reshape =
|
||||
paddle::platform::NgReshaper(scale_bcast, node->get_shape());
|
||||
|
||||
return std::make_shared<T>(scale_reshape, node);
|
||||
}
|
||||
} // namespace ngraphs
|
||||
} // namespace operators
|
||||
} // namespace paddle
|
||||
#endif
|
@ -0,0 +1,68 @@
|
||||
/*Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License. */
|
||||
|
||||
#ifdef PADDLE_WITH_NGRAPH
|
||||
#pragma once
|
||||
|
||||
#include <functional>
|
||||
#include <string>
|
||||
|
||||
#include "ngraph/ngraph.hpp"
|
||||
#include "paddle/fluid/operators/ngraph/ops/elementwise_scalar_op.h"
|
||||
#include "paddle/fluid/platform/ngraph_helper.h"
|
||||
|
||||
namespace paddle {
|
||||
namespace operators {
|
||||
namespace ngraphs {
|
||||
|
||||
void BuildMeanNode(
|
||||
const std::shared_ptr<paddle::framework::OperatorBase>& op,
|
||||
std::shared_ptr<
|
||||
std::unordered_map<std::string, std::shared_ptr<ngraph::Node>>>
|
||||
ngb_node_map) {
|
||||
auto input = paddle::platform::GetInputNode(op, "X", ngb_node_map);
|
||||
ngraph::AxisSet axes;
|
||||
for (size_t i = 0; i < input->get_shape().size(); ++i) {
|
||||
axes.insert(i);
|
||||
}
|
||||
|
||||
auto mean = ngraph::builder::mean(input, axes);
|
||||
auto mean_1d = std::make_shared<ngraph::op::Reshape>(
|
||||
mean, ngraph::AxisVector{}, ngraph::Shape{1});
|
||||
paddle::platform::SetOutputNode(op, "Out", mean_1d, ngb_node_map);
|
||||
}
|
||||
|
||||
void BuildMeanGradNode(
|
||||
const std::shared_ptr<paddle::framework::OperatorBase>& op,
|
||||
std::shared_ptr<
|
||||
std::unordered_map<std::string, std::shared_ptr<ngraph::Node>>>
|
||||
ngb_node_map) {
|
||||
auto x = paddle::platform::GetInputNode(op, "X", ngb_node_map);
|
||||
auto og = paddle::platform::GetInputNode(op, "Out@GRAD", ngb_node_map);
|
||||
auto x_shape = x->get_shape();
|
||||
float x_size = std::accumulate(std::begin(x_shape), std::end(x_shape), 1,
|
||||
std::multiplies<float>());
|
||||
auto node_const = ngraph::op::Constant::create(og->get_element_type(),
|
||||
ngraph::Shape{1}, {x_size});
|
||||
auto node_div = std::make_shared<ngraph::op::Divide>(og, node_const);
|
||||
|
||||
auto result = ElementwiseScalar<ngraph::op::Add>(
|
||||
og / node_const,
|
||||
ngraph::op::Constant::create(og->get_element_type(), x_shape, {0}));
|
||||
paddle::platform::SetOutputNode(op, "X@GRAD", result, ngb_node_map);
|
||||
}
|
||||
} // namespace ngraphs
|
||||
} // namespace operators
|
||||
} // namespace paddle
|
||||
#endif
|
@ -0,0 +1,41 @@
|
||||
/*Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License. */
|
||||
|
||||
#ifdef PADDLE_WITH_NGRAPH
|
||||
#pragma once
|
||||
|
||||
#include <string>
|
||||
#include "ngraph/ngraph.hpp"
|
||||
#include "paddle/fluid/operators/ngraph/ops/elementwise_scalar_op.h"
|
||||
#include "paddle/fluid/platform/ngraph_helper.h"
|
||||
|
||||
namespace paddle {
|
||||
namespace operators {
|
||||
namespace ngraphs {
|
||||
|
||||
void BuildScaleNode(
|
||||
const std::shared_ptr<paddle::framework::OperatorBase>& op,
|
||||
std::shared_ptr<
|
||||
std::unordered_map<std::string, std::shared_ptr<ngraph::Node>>>
|
||||
ngb_node_map) {
|
||||
auto op_attrs = paddle::framework::AttrReader(op->Attrs());
|
||||
float scale = op_attrs.Get<float>("scale");
|
||||
auto x = paddle::platform::GetInputNode(op, "X", ngb_node_map);
|
||||
auto out = ElementwiseScalar<ngraph::op::Multiply>(scale, x);
|
||||
paddle::platform::SetOutputNode(op, "Out", out, ngb_node_map);
|
||||
}
|
||||
} // namespace ngraphs
|
||||
} // namespace operators
|
||||
} // namespace paddle
|
||||
#endif
|
@ -1,58 +0,0 @@
|
||||
// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <mutex> // NOLINT
|
||||
|
||||
#include "paddle/fluid/platform/dynload/cublas.h"
|
||||
#include "paddle/fluid/platform/macros.h"
|
||||
|
||||
#if CUDA_VERSION < 9000
|
||||
enum cublasMath_t { CUBLAS_DEFAULT_MATH = 0 };
|
||||
#endif
|
||||
|
||||
namespace paddle {
|
||||
namespace platform {
|
||||
|
||||
class CublasHandleHolder {
|
||||
public:
|
||||
CublasHandleHolder(cudaStream_t stream, cublasMath_t math_type) {
|
||||
PADDLE_ENFORCE(dynload::cublasCreate(&handle_));
|
||||
PADDLE_ENFORCE(dynload::cublasSetStream(handle_, stream));
|
||||
#if CUDA_VERSION >= 9000
|
||||
if (math_type == CUBLAS_TENSOR_OP_MATH) {
|
||||
PADDLE_ENFORCE(
|
||||
dynload::cublasSetMathMode(handle_, CUBLAS_TENSOR_OP_MATH));
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
~CublasHandleHolder() { PADDLE_ENFORCE(dynload::cublasDestroy(handle_)); }
|
||||
|
||||
template <typename Callback>
|
||||
inline void Call(Callback &&callback) const {
|
||||
std::lock_guard<std::mutex> guard(mtx_);
|
||||
callback(handle_);
|
||||
}
|
||||
|
||||
private:
|
||||
DISABLE_COPY_AND_ASSIGN(CublasHandleHolder);
|
||||
|
||||
cublasHandle_t handle_;
|
||||
mutable std::mutex mtx_;
|
||||
};
|
||||
|
||||
} // namespace platform
|
||||
} // namespace paddle
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in new issue