remove conflict

mobile_baidu
chengduoZH 7 years ago
commit 1431f25196

1
.gitignore vendored

@ -28,3 +28,4 @@ cmake_install.cmake
paddle/.timestamp paddle/.timestamp
python/paddlepaddle.egg-info/ python/paddlepaddle.egg-info/
paddle/pybind/pybind.h paddle/pybind/pybind.h
python/paddle/v2/framework/tests/tmp/*

@ -15,7 +15,7 @@ nv_test(lod_tensor_gpu_test SRCS lod_tensor_test.cu DEPS lod_tensor)
cc_test(variable_test SRCS variable_test.cc) cc_test(variable_test SRCS variable_test.cc)
cc_library(scope SRCS scope.cc) cc_library(scope SRCS scope.cc DEPS glog)
cc_test(scope_test SRCS scope_test.cc DEPS scope) cc_test(scope_test SRCS scope_test.cc DEPS scope)
@ -24,9 +24,10 @@ cc_test(program_desc_test SRCS program_desc_test.cc DEPS proto_desc)
cc_library(op_proto_maker SRCS op_proto_maker.cc DEPS framework_proto attribute) cc_library(op_proto_maker SRCS op_proto_maker.cc DEPS framework_proto attribute)
cc_test(op_proto_maker_test SRCS op_proto_maker_test.cc DEPS op_proto_maker) cc_test(op_proto_maker_test SRCS op_proto_maker_test.cc DEPS op_proto_maker)
cc_library(op_info SRCS op_info.cc DEPS attribute framework_proto) cc_library(op_info SRCS op_info.cc DEPS attribute framework_proto)
cc_library(operator SRCS operator.cc DEPS op_info device_context tensor scope glog) cc_library(shape_inference SRCS shape_inference.cc DEPS ddim attribute)
cc_library(operator SRCS operator.cc DEPS op_info device_context tensor scope glog shape_inference)
cc_test(operator_test SRCS operator_test.cc DEPS operator op_registry) cc_test(operator_test SRCS operator_test.cc DEPS operator op_registry)
cc_library(proto_desc SRCS var_desc.cc op_desc.cc block_desc.cc program_desc.cc DEPS attribute ddim op_info operator glog) cc_library(proto_desc SRCS var_desc.cc op_desc.cc block_desc.cc program_desc.cc DEPS shape_inference op_info operator glog)
cc_library(op_registry SRCS op_registry.cc DEPS op_proto_maker op_info operator glog proto_desc) cc_library(op_registry SRCS op_registry.cc DEPS op_proto_maker op_info operator glog proto_desc)
cc_test(op_registry_test SRCS op_registry_test.cc DEPS op_registry) cc_test(op_registry_test SRCS op_registry_test.cc DEPS op_registry)

@ -34,5 +34,25 @@ inline DataType ToDataType(std::type_index type) {
} }
} }
template <typename Visitor>
inline void VisitDataType(DataType type, Visitor visitor) {
switch (type) {
case DataType::FP32:
visitor.template operator()<float>();
break;
case DataType::FP64:
visitor.template operator()<double>();
break;
case DataType::INT32:
visitor.template operator()<int>();
break;
case DataType::INT64:
visitor.template operator()<int64_t>();
break;
default:
PADDLE_THROW("Not supported");
}
}
} // namespace framework } // namespace framework
} // namespace paddle } // namespace paddle

@ -16,15 +16,51 @@ limitations under the License. */
#include <functional> #include <functional>
#include <mutex> #include <mutex>
#include <unordered_map> #include <unordered_map>
#include "glog/logging.h"
#include "paddle/framework/block_desc.h" #include "paddle/framework/block_desc.h"
#include "paddle/framework/operator.h" #include "paddle/framework/operator.h"
#include "paddle/framework/program_desc.h" #include "paddle/framework/program_desc.h"
#include "paddle/framework/shape_inference.h"
#include "glog/logging.h"
namespace paddle { namespace paddle {
namespace framework { namespace framework {
class OpDescBind;
class BlockDescBind;
class CompileTimeInferShapeContext : public InferShapeContext {
public:
CompileTimeInferShapeContext(const OpDescBind &op,
const BlockDescBind &block);
bool HasInput(const std::string &name) const override;
bool HasOutput(const std::string &name) const override;
bool HasInputs(const std::string &name) const override;
bool HasOutputs(const std::string &name) const override;
DDim GetInputDim(const std::string &name) const override;
void SetOutputDim(const std::string &name, const DDim &dim) override;
AttrReader Attrs() const override;
const std::vector<std::string> &Inputs(
const std::string &name) const override;
const std::vector<std::string> &Outputs(
const std::string &name) const override;
private:
DDim GetDim(const std::string &name) const override;
void SetDim(const std::string &name, const DDim &dim) override;
const OpDescBind &op_;
const BlockDescBind &block_;
};
OpDescBind::OpDescBind(const std::string &type, const VariableNameMap &inputs, OpDescBind::OpDescBind(const std::string &type, const VariableNameMap &inputs,
const VariableNameMap &outputs, const VariableNameMap &outputs,
const AttributeMap &attrs) { const AttributeMap &attrs) {
@ -288,5 +324,97 @@ void OpDescBind::InferVarType(BlockDescBind *block) const {
} }
} }
CompileTimeInferShapeContext::CompileTimeInferShapeContext(
const OpDescBind &op, const BlockDescBind &block)
: op_(op), block_(block) {}
bool CompileTimeInferShapeContext::HasInput(const std::string &name) const {
const std::vector<std::string> &input_names = op_.Input(name);
auto length = input_names.size();
if (length == 0) {
return false;
}
PADDLE_ENFORCE_EQ(length, 1UL,
"Input(%s) should have only one value, "
"but it have %d now",
name, length);
return block_.HasVarRecursive(input_names[0]);
}
bool CompileTimeInferShapeContext::HasOutput(const std::string &name) const {
const std::vector<std::string> &output_names = op_.Output(name);
auto length = output_names.size();
if (length == 0) {
return false;
}
PADDLE_ENFORCE_EQ(length, 1UL,
"Output(%s) should have only one value, "
"but it have %d now",
name, length);
return block_.HasVarRecursive(output_names[0]);
}
bool CompileTimeInferShapeContext::HasInputs(const std::string &name) const {
const std::vector<std::string> &input_names = op_.Input(name);
if (input_names.empty()) {
return false;
}
for (auto &input : input_names) {
if (!block_.HasVarRecursive(input)) return false;
}
return true;
}
bool CompileTimeInferShapeContext::HasOutputs(const std::string &name) const {
const std::vector<std::string> &output_names = op_.Output(name);
if (output_names.empty()) {
return false;
}
for (auto &output : output_names) {
if (!block_.HasVarRecursive(output)) return false;
}
return true;
}
DDim CompileTimeInferShapeContext::GetInputDim(const std::string &name) const {
std::vector<DDim> ddims = GetInputsDim(name);
auto length = ddims.size();
PADDLE_ENFORCE_EQ(length, 1UL,
"Input(%s) should have 1 value, "
"but it has %d now",
name, length);
return ddims[0];
}
void CompileTimeInferShapeContext::SetOutputDim(const std::string &name,
const DDim &dim) {
SetOutputsDim(name, {dim});
}
AttrReader CompileTimeInferShapeContext::Attrs() const {
return AttrReader(op_.GetAttrMap());
}
const std::vector<std::string> &CompileTimeInferShapeContext::Inputs(
const std::string &name) const {
return op_.Input(name);
}
const std::vector<std::string> &CompileTimeInferShapeContext::Outputs(
const std::string &name) const {
return op_.Output(name);
}
DDim CompileTimeInferShapeContext::GetDim(const std::string &name) const {
auto var = block_.FindVarRecursive(name);
PADDLE_ENFORCE(var != nullptr, "Cannot find variable %s", name);
return framework::make_ddim(var->Shape());
}
void CompileTimeInferShapeContext::SetDim(const std::string &name,
const DDim &dim) {
block_.FindVarRecursive(name)->SetShape(framework::vectorize(dim));
}
} // namespace framework } // namespace framework
} // namespace paddle } // namespace paddle

@ -107,6 +107,8 @@ class OpDescBind {
void InferVarType(BlockDescBind *block) const; void InferVarType(BlockDescBind *block) const;
void MarkAsTarget() { desc_.set_is_target(true); }
void Flush(); void Flush();
private: private:

@ -29,6 +29,7 @@ limitations under the License. */
#include "paddle/framework/op_desc.h" #include "paddle/framework/op_desc.h"
#include "paddle/framework/operator.h" #include "paddle/framework/operator.h"
#include "paddle/framework/scope.h" #include "paddle/framework/scope.h"
#include "paddle/framework/shape_inference.h"
namespace paddle { namespace paddle {
namespace framework { namespace framework {
@ -161,6 +162,10 @@ class OpKernelRegistrar : public Registrar {
REGISTER_OPERATOR(op_type, op_class, _GradOpDescMaker_##grad_op_type##_, \ REGISTER_OPERATOR(op_type, op_class, _GradOpDescMaker_##grad_op_type##_, \
op_maker_class); op_maker_class);
#define REGISTER_OP_WITH_KERNEL(op_type, ...) \
REGISTER_OPERATOR(op_type, ::paddle::framework::OperatorWithKernel, \
##__VA_ARGS__)
#define REGISTER_OP_WITHOUT_GRADIENT(op_type, op_class, op_maker_class) \ #define REGISTER_OP_WITHOUT_GRADIENT(op_type, op_class, op_maker_class) \
REGISTER_OPERATOR(op_type, op_class, op_maker_class) REGISTER_OPERATOR(op_type, op_class, op_maker_class)
@ -223,6 +228,10 @@ class OpKernelRegistrar : public Registrar {
USE_OP_ITSELF(op_type); \ USE_OP_ITSELF(op_type); \
USE_OP_DEVICE_KERNEL(op_type, CPU); USE_OP_DEVICE_KERNEL(op_type, CPU);
#define USE_GPU_ONLY_OP(op_type) \
USE_OP_ITSELF(op_type); \
USE_OP_DEVICE_KERNEL(op_type, GPU)
#define USE_OP(op_type) \ #define USE_OP(op_type) \
USE_OP_ITSELF(op_type); \ USE_OP_ITSELF(op_type); \
USE_OP_KERNEL(op_type) USE_OP_KERNEL(op_type)

@ -15,6 +15,7 @@ limitations under the License. */
#include "paddle/framework/operator.h" #include "paddle/framework/operator.h"
#include <algorithm> #include <algorithm>
#include <atomic> #include <atomic>
#include "paddle/framework/shape_inference.h"
namespace paddle { namespace paddle {
namespace framework { namespace framework {
@ -273,5 +274,137 @@ bool OpSupportGPU(const std::string& op_type) {
return false; return false;
} }
class RuntimeInferShapeContext : public InferShapeContext {
public:
RuntimeInferShapeContext(const OperatorBase& op, const Scope& scope)
: op_(op), scope_(scope) {}
bool HasInput(const std::string& name) const override {
auto& ins = Inputs(name);
size_t length = ins.size();
if (length == 0) {
return false;
}
PADDLE_ENFORCE_EQ(length, 1UL, "Input %s should have more than one inputs",
name);
auto ipt = ins[0];
auto* var = ipt == kEmptyVarName ? nullptr : scope_.FindVar(ipt);
return var != nullptr;
}
bool HasOutput(const std::string& name) const override {
auto& outs = Outputs(name);
size_t length = outs.size();
if (length == 0) {
return false;
}
PADDLE_ENFORCE_EQ(length, 1UL, "Output %s should have more than one inputs",
name);
auto ipt = outs[0];
auto* var = ipt == kEmptyVarName ? nullptr : scope_.FindVar(ipt);
return var != nullptr;
}
bool HasInputs(const std::string& name) const override {
auto inputs = op_.Inputs(name);
if (inputs.empty()) {
return false;
}
for (auto& input : inputs) {
if (scope_.FindVar(input) == nullptr) {
return false;
}
}
return true;
}
bool HasOutputs(const std::string& name) const override {
auto outputs = op_.Outputs(name);
if (outputs.empty()) {
return false;
}
for (auto& output : outputs) {
if (scope_.FindVar(output) == nullptr) {
return false;
}
}
return true;
}
DDim GetInputDim(const std::string& name) const override {
return GetDim(op_.Input(name));
}
void SetOutputDim(const std::string& name, const DDim& dim) override {
SetDim(op_.Output(name), dim);
}
AttrReader Attrs() const override { return AttrReader(op_.Attrs()); }
const std::vector<std::string>& Inputs(
const std::string& name) const override {
return op_.Inputs(name);
}
const std::vector<std::string>& Outputs(
const std::string& name) const override {
return op_.Outputs(name);
}
private:
DDim GetDim(const std::string& name) const override {
Variable* var = scope_.FindVar(name);
if (var->IsType<LoDTensor>()) {
return var->Get<LoDTensor>().dims();
} else if (var->IsType<SelectedRows>()) {
return var->Get<SelectedRows>().GetCompleteDims();
} else {
PADDLE_THROW("Variable type must be LoDTensor/SelectedRows.");
}
}
void SetDim(const std::string& name, const DDim& dim) override {
Variable* var = scope_.FindVar(name);
if (var->IsType<LoDTensor>()) {
var->GetMutable<LoDTensor>()->Resize(dim);
} else if (var->IsType<SelectedRows>()) {
var->GetMutable<SelectedRows>()->set_height(dim[0]);
} else {
PADDLE_THROW("Variable type must be LoDTensor/SelectedRows.");
}
}
const OperatorBase& op_;
const Scope& scope_;
};
void OperatorWithKernel::Run(const Scope& scope,
const platform::DeviceContext& dev_ctx) const {
VLOG(3) << "Running operator " << this->Type();
RuntimeInferShapeContext infer_shape_ctx(*this, scope);
this->InferShape(&infer_shape_ctx);
ExecutionContext ctx(*this, scope, dev_ctx);
// check if op[type] has kernel registered.
auto& all_op_kernels = AllOpKernels();
auto kernels_iter = all_op_kernels.find(type_);
if (kernels_iter == all_op_kernels.end()) {
PADDLE_THROW(
"There are no kernels which are registered in the %s operator.", type_);
}
// check if op[type] have kernel for kernel_key
OpKernelMap& kernels = kernels_iter->second;
auto kernel_key = OpKernelKey(IndicateDataType(ctx), dev_ctx);
auto kernel_iter = kernels.find(kernel_key);
if (kernel_iter == kernels.end()) {
PADDLE_THROW("The operator %s does not support %s", type_, kernel_key);
}
kernel_iter->second->Compute(ctx);
}
} // namespace framework } // namespace framework
} // namespace paddle } // namespace paddle

File diff suppressed because it is too large Load Diff

@ -49,6 +49,13 @@ ProgramDescBind::ProgramDescBind(const ProgramDescBind &o) {
} }
} }
ProgramDescBind::ProgramDescBind(const ProgramDesc &desc) {
desc_ = desc;
for (auto &block_desc : *desc_.mutable_blocks()) {
blocks_.emplace_back(new BlockDescBind(this, &block_desc));
}
}
ProgramDescBind::ProgramDescBind(const std::string &binary_str) { ProgramDescBind::ProgramDescBind(const std::string &binary_str) {
PADDLE_ENFORCE(desc_.ParseFromString(binary_str), PADDLE_ENFORCE(desc_.ParseFromString(binary_str),
"Fail to parse program_desc from binary string."); "Fail to parse program_desc from binary string.");

@ -29,6 +29,8 @@ class ProgramDescBind {
public: public:
ProgramDescBind(); ProgramDescBind();
explicit ProgramDescBind(const ProgramDesc &desc);
ProgramDescBind(const ProgramDescBind &o); ProgramDescBind(const ProgramDescBind &o);
explicit ProgramDescBind(const std::string &binary_str); explicit ProgramDescBind(const std::string &binary_str);

@ -46,7 +46,7 @@ bool IsTarget(const OpDesc& op_desc) {
return false; return false;
} }
void prune_impl(const ProgramDesc& input, ProgramDesc& output, int block_id) { void prune_impl(const ProgramDesc& input, ProgramDesc* output, int block_id) {
// TODO(tonyyang-svail): // TODO(tonyyang-svail):
// - will change to use multiple blocks for RNN op and Cond Op // - will change to use multiple blocks for RNN op and Cond Op
@ -91,8 +91,8 @@ void prune_impl(const ProgramDesc& input, ProgramDesc& output, int block_id) {
// we reverse the should_run vector // we reverse the should_run vector
std::reverse(should_run.begin(), should_run.end()); std::reverse(should_run.begin(), should_run.end());
output = input; *output = input;
auto* op_field = output.mutable_blocks(block_id)->mutable_ops(); auto* op_field = output->mutable_blocks(block_id)->mutable_ops();
op_field->Clear(); op_field->Clear();
for (size_t i = 0; i < should_run.size(); ++i) { for (size_t i = 0; i < should_run.size(); ++i) {
if (should_run[i]) { if (should_run[i]) {
@ -101,7 +101,8 @@ void prune_impl(const ProgramDesc& input, ProgramDesc& output, int block_id) {
} }
} }
void Prune(const ProgramDesc& input, ProgramDesc& output) { // TODO(fengjiayi): Prune() could be inplaced to avoid unnecessary copies
void Prune(const ProgramDesc& input, ProgramDesc* output) {
prune_impl(input, output, 0); prune_impl(input, output, 0);
} }

@ -20,7 +20,7 @@ limitations under the License. */
namespace paddle { namespace paddle {
namespace framework { namespace framework {
void Prune(const ProgramDesc& input, ProgramDesc& output); void Prune(const ProgramDesc& input, ProgramDesc* output);
} // namespace framework } // namespace framework
} // namespace paddle } // namespace paddle

@ -59,11 +59,11 @@ TEST(Prune, one_operator) {
f::ProgramDesc *pdesc = program.Proto(); f::ProgramDesc *pdesc = program.Proto();
f::ProgramDesc pruned; f::ProgramDesc pruned;
Prune(*pdesc, pruned); Prune(*pdesc, &pruned);
PADDLE_ENFORCE_EQ(pruned.blocks(0).ops_size(), 0); PADDLE_ENFORCE_EQ(pruned.blocks(0).ops_size(), 0);
pdesc->mutable_blocks(0)->mutable_ops(0)->set_is_target(true); pdesc->mutable_blocks(0)->mutable_ops(0)->set_is_target(true);
Prune(*pdesc, pruned); Prune(*pdesc, &pruned);
PADDLE_ENFORCE_EQ(pruned.blocks(0).ops_size(), 1); PADDLE_ENFORCE_EQ(pruned.blocks(0).ops_size(), 1);
} }
@ -81,7 +81,7 @@ TEST(Prune, forward) {
for (int i = 0; i < pdesc->blocks(0).ops_size(); ++i) { for (int i = 0; i < pdesc->blocks(0).ops_size(); ++i) {
f::ProgramDesc pruned; f::ProgramDesc pruned;
pdesc->mutable_blocks(0)->mutable_ops(i)->set_is_target(true); pdesc->mutable_blocks(0)->mutable_ops(i)->set_is_target(true);
Prune(*pdesc, pruned); Prune(*pdesc, &pruned);
PADDLE_ENFORCE_EQ(pruned.blocks(0).ops_size(), i + 1); PADDLE_ENFORCE_EQ(pruned.blocks(0).ops_size(), i + 1);
} }
} }
@ -100,7 +100,7 @@ TEST(Prune, multi_input_op) {
pdesc->mutable_blocks(0)->mutable_ops(3)->set_is_target(true); pdesc->mutable_blocks(0)->mutable_ops(3)->set_is_target(true);
f::ProgramDesc pruned; f::ProgramDesc pruned;
Prune(*pdesc, pruned); Prune(*pdesc, &pruned);
PADDLE_ENFORCE_EQ(pruned.blocks(0).ops_size(), 4); PADDLE_ENFORCE_EQ(pruned.blocks(0).ops_size(), 4);
} }
@ -116,7 +116,7 @@ TEST(Prune, multi_output_op) {
pdesc->mutable_blocks(0)->mutable_ops(2)->set_is_target(true); pdesc->mutable_blocks(0)->mutable_ops(2)->set_is_target(true);
f::ProgramDesc pruned; f::ProgramDesc pruned;
Prune(*pdesc, pruned); Prune(*pdesc, &pruned);
PADDLE_ENFORCE_EQ(pruned.blocks(0).ops_size(), 2); PADDLE_ENFORCE_EQ(pruned.blocks(0).ops_size(), 2);
} }
@ -133,6 +133,6 @@ TEST(Prune, multi_target) {
pdesc->mutable_blocks(0)->mutable_ops(2)->set_is_target(true); pdesc->mutable_blocks(0)->mutable_ops(2)->set_is_target(true);
f::ProgramDesc pruned; f::ProgramDesc pruned;
Prune(*pdesc, pruned); Prune(*pdesc, &pruned);
PADDLE_ENFORCE_EQ(pruned.blocks(0).ops_size(), 3); PADDLE_ENFORCE_EQ(pruned.blocks(0).ops_size(), 3);
} }

@ -16,6 +16,7 @@ limitations under the License. */
#include <memory> // for unique_ptr #include <memory> // for unique_ptr
#include <mutex> // for call_once #include <mutex> // for call_once
#include "glog/logging.h"
#include "paddle/string/printf.h" #include "paddle/string/printf.h"
namespace paddle { namespace paddle {
@ -23,7 +24,10 @@ namespace framework {
Scope::~Scope() { Scope::~Scope() {
DropKids(); DropKids();
for (auto& kv : vars_) delete kv.second; for (auto& kv : vars_) {
VLOG(3) << "Destroy variable " << kv.first;
delete kv.second;
}
} }
Scope& Scope::NewScope() const { Scope& Scope::NewScope() const {
@ -38,6 +42,7 @@ Variable* Scope::Var(const std::string& name) {
} }
Variable* v = new Variable(); Variable* v = new Variable();
vars_[name] = v; vars_[name] = v;
VLOG(3) << "Create variable " << name << " on scope";
v->name_ = &(vars_.find(name)->first); v->name_ = &(vars_.find(name)->first);
return v; return v;
} }

@ -0,0 +1,54 @@
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "paddle/framework/shape_inference.h"
namespace paddle {
namespace framework {
std::vector<framework::DDim> InferShapeContext::GetInputsDim(
const std::string &name) const {
const std::vector<std::string> &names = Inputs(name);
return GetDims(names);
}
void InferShapeContext::SetOutputsDim(
const std::string &name, const std::vector<framework::DDim> &dims) {
auto &names = Outputs(name);
SetDims(names, dims);
}
void InferShapeContext::ShareLoD(const std::string &in, const std::string &out,
size_t i, size_t j) const {}
std::vector<framework::DDim> InferShapeContext::GetDims(
const std::vector<std::string> &names) const {
std::vector<framework::DDim> ret;
ret.reserve(names.size());
std::transform(
names.begin(), names.end(), std::back_inserter(ret),
[this](const std::string &name) { return this->GetDim(name); });
return ret;
}
void InferShapeContext::SetDims(const std::vector<std::string> &names,
const std::vector<framework::DDim> &dims) {
size_t length = names.size();
PADDLE_ENFORCE_EQ(length, dims.size());
for (size_t i = 0; i < length; ++i) {
SetDim(names[i], dims[i]);
}
}
} // namespace framework
} // namespace paddle

@ -14,6 +14,7 @@ limitations under the License. */
#pragma once #pragma once
#include "paddle/framework/attribute.h"
#include "paddle/framework/ddim.h" #include "paddle/framework/ddim.h"
namespace paddle { namespace paddle {
@ -21,7 +22,7 @@ namespace framework {
class InferShapeContext { class InferShapeContext {
public: public:
virtual ~InferShapeContext() {} virtual ~InferShapeContext() = default;
virtual bool HasInput(const std::string &name) const = 0; virtual bool HasInput(const std::string &name) const = 0;
virtual bool HasOutput(const std::string &name) const = 0; virtual bool HasOutput(const std::string &name) const = 0;
@ -29,57 +30,32 @@ class InferShapeContext {
virtual bool HasOutputs(const std::string &name) const = 0; virtual bool HasOutputs(const std::string &name) const = 0;
virtual framework::DDim GetInputDim(const std::string &name) const = 0; virtual framework::DDim GetInputDim(const std::string &name) const = 0;
std::vector<framework::DDim> GetInputsDim(const std::string &name) const {
const std::vector<std::string> &names = Inputs(name); std::vector<framework::DDim> GetInputsDim(const std::string &name) const;
return GetDims(names);
}
virtual void SetInputDim(const std::string &name,
const framework::DDim &dim) = 0;
void SetInputsDim(const std::string &name,
const std::vector<framework::DDim> &dims) {
auto &names = Inputs(name);
SetDims(names, dims);
}
virtual framework::DDim GetOutputDim(const std::string &name) const = 0;
std::vector<framework::DDim> GetOutputsDim(const std::string &name) const {
const std::vector<std::string> &names = Outputs(name);
return GetDims(names);
}
virtual void SetOutputDim(const std::string &name, const DDim &dim) = 0; virtual void SetOutputDim(const std::string &name, const DDim &dim) = 0;
void SetOutputsDim(const std::string &name, void SetOutputsDim(const std::string &name,
const std::vector<framework::DDim> &dims) { const std::vector<framework::DDim> &dims);
auto &names = Outputs(name);
SetDims(names, dims);
}
virtual AttrReader Attrs() const = 0; virtual AttrReader Attrs() const = 0;
virtual const std::vector<std::string> &Inputs( virtual const std::vector<std::string> &Inputs(
const std::string &name) const = 0; const std::string &name) const = 0;
virtual const std::vector<std::string> &Outputs( virtual const std::vector<std::string> &Outputs(
const std::string &name) const = 0; const std::string &name) const = 0;
// TODO(qiao) implement this function // TODO(qiao) implement this function
void ShareLoD(const std::string &in, const std::string &out, size_t i = 0, void ShareLoD(const std::string &in, const std::string &out, size_t i = 0,
size_t j = 0) const {} size_t j = 0) const;
protected: protected:
virtual framework::DDim GetDim(const std::string &name) const = 0; virtual framework::DDim GetDim(const std::string &name) const = 0;
virtual void SetDim(const std::string &name, const framework::DDim &dim) = 0; virtual void SetDim(const std::string &name, const framework::DDim &dim) = 0;
std::vector<framework::DDim> GetDims( std::vector<framework::DDim> GetDims(
const std::vector<std::string> &names) const { const std::vector<std::string> &names) const;
std::vector<framework::DDim> ret;
ret.reserve(names.size());
std::transform(
names.begin(), names.end(), std::back_inserter(ret),
[this](const std::string &name) { return this->GetDim(name); });
return ret;
}
void SetDims(const std::vector<std::string> &names, void SetDims(const std::vector<std::string> &names,
const std::vector<framework::DDim> &dims) { const std::vector<framework::DDim> &dims);
size_t length = names.size();
PADDLE_ENFORCE_EQ(length, dims.size());
for (size_t i = 0; i < length; ++i) {
SetDim(names[i], dims[i]);
}
}
}; };
} // namespace framework } // namespace framework

@ -126,11 +126,16 @@ class Tensor {
inline Tensor Slice(const int& begin_idx, const int& end_idx) const; inline Tensor Slice(const int& begin_idx, const int& end_idx) const;
platform::Place place() const { platform::Place place() const {
PADDLE_ENFORCE_NOT_NULL(holder_, "Tensor get place() must contains holder"); PADDLE_ENFORCE_NOT_NULL(
holder_, "Tensor not initialized yet when Tensor::place() is called.");
return holder_->place(); return holder_->place();
} }
std::type_index type() const { return holder_->type(); } std::type_index type() const {
PADDLE_ENFORCE_NOT_NULL(
holder_, "Tensor not initialized yet when Tensor::type() is called.");
return holder_->type();
}
size_t memory_size() const; size_t memory_size() const;

@ -1,6 +1,6 @@
add_subdirectory(detail) add_subdirectory(detail)
cc_library(memory SRCS memory.cc) cc_library(memory SRCS memory.cc DEPS place)
cc_library(memcpy SRCS memcpy.cc) cc_library(memcpy SRCS memcpy.cc)
cc_library(paddle_memory cc_library(paddle_memory

@ -13,6 +13,7 @@
limitations under the License. */ limitations under the License. */
#include "paddle/memory/detail/meta_cache.h" #include "paddle/memory/detail/meta_cache.h"
#include "glog/logging.h"
#include "paddle/memory/detail/memory_block.h" #include "paddle/memory/detail/memory_block.h"
#include "paddle/platform/assert.h" #include "paddle/platform/assert.h"
@ -28,7 +29,9 @@ Metadata MetadataCache::load(const MemoryBlock* block) {
PADDLE_ASSERT(existing_metadata->second.check_guards()); PADDLE_ASSERT(existing_metadata->second.check_guards());
return existing_metadata->second; return existing_metadata->second;
} else { } else {
PADDLE_ASSERT(reinterpret_cast<const Metadata*>(block)->check_guards()); auto* meta = reinterpret_cast<const Metadata*>(block);
VLOG(3) << "Load MetaData type=" << meta->type;
PADDLE_ASSERT(meta->check_guards());
return *reinterpret_cast<const Metadata*>(block); return *reinterpret_cast<const Metadata*>(block);
} }
} }

@ -39,11 +39,15 @@ BuddyAllocator* GetCPUBuddyAllocator() {
template <> template <>
void* Alloc<platform::CPUPlace>(platform::CPUPlace place, size_t size) { void* Alloc<platform::CPUPlace>(platform::CPUPlace place, size_t size) {
return GetCPUBuddyAllocator()->Alloc(size); VLOG(3) << "Allocate " << size << " bytes on " << platform::Place(place);
void* p = GetCPUBuddyAllocator()->Alloc(size);
VLOG(3) << " pointer=" << p;
return p;
} }
template <> template <>
void Free<platform::CPUPlace>(platform::CPUPlace place, void* p) { void Free<platform::CPUPlace>(platform::CPUPlace place, void* p) {
VLOG(3) << "Free pointer=" << p << " on " << platform::Place(place);
GetCPUBuddyAllocator()->Free(p); GetCPUBuddyAllocator()->Free(p);
} }

@ -97,6 +97,13 @@ function(op_library TARGET)
file(APPEND ${pybind_file} "USE_OP(sigmoid);\n") file(APPEND ${pybind_file} "USE_OP(sigmoid);\n")
endif() endif()
# nccl_op contains several operators
if ("${TARGET}" STREQUAL "nccl_op")
set(pybind_flag 1)
# It's enough to just adding one operator to pybind
file(APPEND ${pybind_file} "USE_GPU_ONLY_OP(ncclAllReduce);\n")
endif()
# reduce_op contains several operators # reduce_op contains several operators
if ("${TARGET}" STREQUAL "reduce_op") if ("${TARGET}" STREQUAL "reduce_op")
set(pybind_flag 1) set(pybind_flag 1)
@ -128,6 +135,7 @@ function(op_library TARGET)
endfunction() endfunction()
add_subdirectory(math) add_subdirectory(math)
add_subdirectory(nccl)
set(DEPS_OPS set(DEPS_OPS
recurrent_op recurrent_op
@ -139,6 +147,7 @@ set(DEPS_OPS
pool_with_index_op pool_with_index_op
lstm_op lstm_op
conv_transpose_op conv_transpose_op
nccl_op
sequence_conv_op sequence_conv_op
lstm_op) lstm_op)
@ -151,6 +160,9 @@ op_library(softmax_with_cross_entropy_op DEPS cross_entropy softmax)
op_library(sum_op DEPS net_op selected_rows_functor) op_library(sum_op DEPS net_op selected_rows_functor)
op_library(pool_op DEPS pooling) op_library(pool_op DEPS pooling)
op_library(pool_with_index_op DEPS pooling) op_library(pool_with_index_op DEPS pooling)
if(WITH_GPU)
op_library(nccl_op DEPS nccl_common)
endif()
op_library(sequence_conv_op DEPS context_project) op_library(sequence_conv_op DEPS context_project)
op_library(lstm_op DEPS sequence2batch lstm_compute) op_library(lstm_op DEPS sequence2batch lstm_compute)
op_library(conv_transpose_op DEPS vol2col) op_library(conv_transpose_op DEPS vol2col)
@ -167,4 +179,8 @@ cc_test(net_op_test SRCS net_op_test.cc DEPS net_op)
cc_test(scatter_test SRCS scatter_test.cc DEPS tensor) cc_test(scatter_test SRCS scatter_test.cc DEPS tensor)
cc_test(strided_memcpy_test SRCS strided_memcpy_test.cc DEPS tensor paddle_memory) cc_test(strided_memcpy_test SRCS strided_memcpy_test.cc DEPS tensor paddle_memory)
cc_test(dynamic_recurrent_op_test SRCS dynamic_recurrent_op_test.cc DEPS dynamic_recurrent_op recurrent_op tensor_array) cc_test(dynamic_recurrent_op_test SRCS dynamic_recurrent_op_test.cc DEPS dynamic_recurrent_op recurrent_op tensor_array)
if(WITH_GPU)
nv_test(nccl_op_test SRCS nccl_op_test.cu DEPS nccl_op gpu_info device_context)
endif()
cc_test(save_load_op_test SRCS save_load_op_test.cc DEPS save_op load_op) cc_test(save_load_op_test SRCS save_load_op_test.cc DEPS save_op load_op)

@ -70,7 +70,5 @@ information, or not. But the output only shares the LoD with input `Inference`.
namespace ops = paddle::operators; namespace ops = paddle::operators;
REGISTER_OP_WITHOUT_GRADIENT(accuracy, ops::AccuracyOp, ops::AccuracyOpMaker); REGISTER_OP_WITHOUT_GRADIENT(accuracy, ops::AccuracyOp, ops::AccuracyOpMaker);
REGISTER_OP_CPU_KERNEL( REGISTER_OP_CPU_KERNEL(
accuracy, ops::AccuracyKernel<paddle::platform::CPUPlace, float>, accuracy, ops::AccuracyKernel<paddle::platform::CPUPlace, int>,
ops::AccuracyKernel<paddle::platform::CPUPlace, int>,
ops::AccuracyKernel<paddle::platform::CPUPlace, double>,
ops::AccuracyKernel<paddle::platform::CPUPlace, int64_t>); ops::AccuracyKernel<paddle::platform::CPUPlace, int64_t>);

@ -81,7 +81,5 @@ class AccuracyOpCUDAKernel : public framework::OpKernel<T> {
} // namespace operators } // namespace operators
} // namespace paddle } // namespace paddle
REGISTER_OP_GPU_KERNEL(accuracy, paddle::operators::AccuracyOpCUDAKernel<float>, REGISTER_OP_GPU_KERNEL(accuracy, paddle::operators::AccuracyOpCUDAKernel<int>,
paddle::operators::AccuracyOpCUDAKernel<double>,
paddle::operators::AccuracyOpCUDAKernel<int>,
paddle::operators::AccuracyOpCUDAKernel<int64_t>); paddle::operators::AccuracyOpCUDAKernel<int64_t>);

@ -18,6 +18,7 @@ namespace paddle {
namespace operators { namespace operators {
using Tensor = framework::Tensor; using Tensor = framework::Tensor;
using LoDTensor = framework::LoDTensor;
template <typename T, int MajorType = Eigen::RowMajor, template <typename T, int MajorType = Eigen::RowMajor,
typename IndexType = Eigen::DenseIndex> typename IndexType = Eigen::DenseIndex>
using EigenMatrix = framework::EigenMatrix<T, MajorType, IndexType>; using EigenMatrix = framework::EigenMatrix<T, MajorType, IndexType>;
@ -64,6 +65,9 @@ class BatchNormOp : public framework::OperatorWithKernel {
(tensor_format == TensorFormat::NCHW ? x_dims[1] (tensor_format == TensorFormat::NCHW ? x_dims[1]
: x_dims[x_dims.size() - 1]); : x_dims[x_dims.size() - 1]);
PADDLE_ENFORCE(x_dims.size() >= 3 && x_dims.size() <= 5,
"Input x must have 3 to 5 dimensions.");
PADDLE_ENFORCE_EQ(ctx->GetInputDim("Scale").size(), 1UL); PADDLE_ENFORCE_EQ(ctx->GetInputDim("Scale").size(), 1UL);
PADDLE_ENFORCE_EQ(ctx->GetInputDim("Scale")[0], C); PADDLE_ENFORCE_EQ(ctx->GetInputDim("Scale")[0], C);
PADDLE_ENFORCE_EQ(ctx->GetInputDim("Bias").size(), 1UL); PADDLE_ENFORCE_EQ(ctx->GetInputDim("Bias").size(), 1UL);
@ -108,10 +112,12 @@ class BatchNormOpMaker : public framework::OpProtoAndCheckerMaker {
"Store the global Variance when training"); "Store the global Variance when training");
AddOutput("SavedMean", AddOutput("SavedMean",
"Mean of the current mini batch, " "Mean of the current mini batch, "
"will apply to output when training"); "will apply to output when training")
.AsIntermediate();
AddOutput("SavedVariance", AddOutput("SavedVariance",
"Variance of the current mini batch, " "Variance of the current mini batch, "
"will apply to output when training"); "will apply to output when training")
.AsIntermediate();
AddComment(R"DOC( AddComment(R"DOC(
https://arxiv.org/pdf/1502.03167.pdf https://arxiv.org/pdf/1502.03167.pdf
@ -135,7 +141,6 @@ class BatchNormKernel<platform::CPUPlace, T> : public framework::OpKernel<T> {
const auto *x = ctx.Input<Tensor>("X"); const auto *x = ctx.Input<Tensor>("X");
const auto &x_dims = x->dims(); const auto &x_dims = x->dims();
PADDLE_ENFORCE(x_dims.size() >= 3 && x_dims.size() <= 5, PADDLE_ENFORCE(x_dims.size() >= 3 && x_dims.size() <= 5,
"The Input dim size should be between 3 and 5"); "The Input dim size should be between 3 and 5");
const int N = x_dims[0]; const int N = x_dims[0];
@ -289,6 +294,25 @@ class BatchNormGradOp : public framework::OperatorWithKernel {
ctx->SetOutputDim(framework::GradVarName("Scale"), {C}); ctx->SetOutputDim(framework::GradVarName("Scale"), {C});
ctx->SetOutputDim(framework::GradVarName("Bias"), {C}); ctx->SetOutputDim(framework::GradVarName("Bias"), {C});
} }
framework::DataType IndicateDataType(
const framework::ExecutionContext &ctx) const override {
VLOG(3) << "IndicateDataType " << this->Type();
const auto *var = ctx.InputVar(framework::GradVarName("Y"));
if (var == nullptr) {
PADDLE_THROW("can't find Y@GRAD");
}
const Tensor *t = nullptr;
if (var->IsType<Tensor>()) {
t = &var->Get<Tensor>();
} else if (var->IsType<LoDTensor>()) {
t = &var->Get<LoDTensor>();
}
if (t == nullptr) {
PADDLE_THROW("can't find Y@GRAD");
}
return framework::ToDataType(t->type());
}
}; };
template <typename T> template <typename T>

@ -117,9 +117,6 @@ class BatchNormKernel<platform::GPUPlace, T> : public framework::OpKernel<T> {
math::SetConstant<platform::GPUPlace, T> functor; math::SetConstant<platform::GPUPlace, T> functor;
functor(ctx.device_context(), saved_mean, 0); functor(ctx.device_context(), saved_mean, 0);
functor(ctx.device_context(), saved_variance, 0); functor(ctx.device_context(), saved_variance, 0);
// FIXME(qiao) should not set zero self
functor(ctx.device_context(), mean_out, 0);
functor(ctx.device_context(), variance_out, 0);
auto handle = ctx.cuda_device_context().cudnn_handle(); auto handle = ctx.cuda_device_context().cudnn_handle();
@ -211,8 +208,15 @@ class BatchNormGradKernel<platform::GPUPlace, T>
mode_ = CUDNN_BATCHNORM_SPATIAL; mode_ = CUDNN_BATCHNORM_SPATIAL;
#endif #endif
std::vector<int> dims = {N, C, H, W, D}; std::vector<int> dims;
std::vector<int> strides = {H * W * C * D, 1, W * D * C, D * C, C}; std::vector<int> strides;
if (tensor_format == TensorFormat::NCHW) {
dims = {N, C, H, W, D};
strides = {C * H * W * D, H * W * D, W * D, D, 1};
} else {
dims = {N, C, H, W, D};
strides = {H * W * C * D, 1, W * D * C, D * C, C};
}
CUDNN_ENFORCE(platform::dynload::cudnnSetTensorNdDescriptor( CUDNN_ENFORCE(platform::dynload::cudnnSetTensorNdDescriptor(
data_desc_, CudnnDataType<T>::type, data_desc_, CudnnDataType<T>::type,
x_dims.size() > 3 ? x_dims.size() : 4, dims.data(), strides.data())); x_dims.size() > 3 ? x_dims.size() : 4, dims.data(), strides.data()));

Some files were not shown because too many files have changed in this diff Show More

Loading…
Cancel
Save