Merge branch 'develop' of https://github.com/PaddlePaddle/Paddle into optimize_hs_op

, test=develop
revert-14398-imperative
JiabinYang 6 years ago
commit 8c75705984

@ -32,6 +32,8 @@ IF(NOT ${WITH_NGRAPH})
return()
ENDIF()
INCLUDE(GNUInstallDirs)
INCLUDE(ExternalProject)
SET(NGRAPH_PROJECT "extern_ngraph")
@ -40,10 +42,14 @@ SET(NGRAPH_GIT_TAG "f9fd9d4cc318dc59dd4b68448e7fbb5f67a28bd0")
SET(NGRAPH_SOURCES_DIR ${THIRD_PARTY_PATH}/ngraph)
SET(NGRAPH_INSTALL_DIR ${THIRD_PARTY_PATH}/install/ngraph)
SET(NGRAPH_INC_DIR ${NGRAPH_INSTALL_DIR}/include)
SET(NGRAPH_LIB_DIR ${NGRAPH_INSTALL_DIR}/${CMAKE_INSTALL_LIBDIR})
SET(NGRAPH_SHARED_LIB_NAME libngraph.so.${NGRAPH_VERSION})
SET(NGRAPH_CPU_LIB_NAME libcpu_backend.so)
SET(NGRAPH_TBB_LIB_NAME libtbb.so.2)
SET(NGRAPH_GIT_REPO "https://github.com/NervanaSystems/ngraph.git")
SET(NGRAPH_SHARED_LIB ${NGRAPH_LIB_DIR}/${NGRAPH_SHARED_LIB_NAME})
SET(NGRAPH_CPU_LIB ${NGRAPH_LIB_DIR}/${NGRAPH_CPU_LIB_NAME})
SET(NGRAPH_TBB_LIB ${NGRAPH_LIB_DIR}/${NGRAPH_TBB_LIB_NAME})
ExternalProject_Add(
${NGRAPH_PROJECT}
@ -63,18 +69,6 @@ ExternalProject_Add(
CMAKE_ARGS -DMKLDNN_LIB_DIR=${MKLDNN_INSTALL_DIR}/lib
)
if(UNIX AND NOT APPLE)
include(GNUInstallDirs)
SET(NGRAPH_LIB_DIR ${NGRAPH_INSTALL_DIR}/${CMAKE_INSTALL_LIBDIR})
else()
SET(NGRAPH_LIB_DIR ${NGRAPH_INSTALL_DIR}/lib)
endif()
MESSAGE(STATUS "nGraph lib will be installed at: ${NGRAPH_LIB_DIR}")
SET(NGRAPH_SHARED_LIB ${NGRAPH_LIB_DIR}/${NGRAPH_SHARED_LIB_NAME})
SET(NGRAPH_CPU_LIB ${NGRAPH_LIB_DIR}/${NGRAPH_CPU_LIB_NAME})
SET(NGRAPH_TBB_LIB ${NGRAPH_LIB_DIR}/${NGRAPH_TBB_LIB_NAME})
# Workaround for nGraph expecting mklml to be in mkldnn install directory.
ExternalProject_Add_Step(
${NGRAPH_PROJECT}

@ -129,6 +129,15 @@ if (WITH_MKLDNN)
)
endif ()
if (WITH_NGRAPH)
set(dst_dir "${FLUID_INSTALL_DIR}/third_party/install/ngraph")
copy(ngraph_lib
SRCS ${NGRAPH_INC_DIR} ${NGRAPH_LIB_DIR}
DSTS ${dst_dir} ${dst_dir}
DEPS ngraph
)
endif ()
if (NOT WIN32)
if (NOT MOBILE_INFERENCE AND NOT RPI)
set(dst_dir "${FLUID_INSTALL_DIR}/third_party/install/snappy")

@ -182,7 +182,7 @@ paddle.fluid.layers.clip ArgSpec(args=['x', 'min', 'max', 'name'], varargs=None,
paddle.fluid.layers.clip_by_norm ArgSpec(args=['x', 'max_norm', 'name'], varargs=None, keywords=None, defaults=(None,))
paddle.fluid.layers.mean ArgSpec(args=['x', 'name'], varargs=None, keywords=None, defaults=(None,))
paddle.fluid.layers.mul ArgSpec(args=['x', 'y', 'x_num_col_dims', 'y_num_col_dims', 'name'], varargs=None, keywords=None, defaults=(1, 1, None))
paddle.fluid.layers.sigmoid_cross_entropy_with_logits ArgSpec(args=['x', 'label', 'name'], varargs=None, keywords=None, defaults=(None,))
paddle.fluid.layers.sigmoid_cross_entropy_with_logits ArgSpec(args=['x', 'label', 'ignore_index', 'name'], varargs=None, keywords=None, defaults=(-100, None))
paddle.fluid.layers.maxout ArgSpec(args=['x', 'groups', 'name'], varargs=None, keywords=None, defaults=(None,))
paddle.fluid.layers.space_to_depth ArgSpec(args=['x', 'blocksize', 'name'], varargs=None, keywords=None, defaults=(None,))
paddle.fluid.layers.affine_grid ArgSpec(args=['theta', 'out_shape', 'name'], varargs=None, keywords=None, defaults=(None,))
@ -299,6 +299,7 @@ paddle.fluid.layers.generate_proposals ArgSpec(args=['scores', 'bbox_deltas', 'i
paddle.fluid.layers.iou_similarity ArgSpec(args=['x', 'y', 'name'], varargs=None, keywords=None, defaults=(None,))
paddle.fluid.layers.box_coder ArgSpec(args=['prior_box', 'prior_box_var', 'target_box', 'code_type', 'box_normalized', 'name'], varargs=None, keywords=None, defaults=('encode_center_size', True, None))
paddle.fluid.layers.polygon_box_transform ArgSpec(args=['input', 'name'], varargs=None, keywords=None, defaults=(None,))
paddle.fluid.layers.yolov3_loss ArgSpec(args=['x', 'gtbox', 'gtlabel', 'anchors', 'class_num', 'ignore_thresh', 'loss_weight_xy', 'loss_weight_wh', 'loss_weight_conf_target', 'loss_weight_conf_notarget', 'loss_weight_class', 'name'], varargs=None, keywords=None, defaults=(None, None, None, None, None, None))
paddle.fluid.layers.accuracy ArgSpec(args=['input', 'label', 'k', 'correct', 'total'], varargs=None, keywords=None, defaults=(1, None, None))
paddle.fluid.layers.auc ArgSpec(args=['input', 'label', 'curve', 'num_thresholds', 'topk', 'slide_steps'], varargs=None, keywords=None, defaults=('ROC', 4095, 1, 1))
paddle.fluid.layers.exponential_decay ArgSpec(args=['learning_rate', 'decay_steps', 'decay_rate', 'staircase'], varargs=None, keywords=None, defaults=(False,))

@ -127,8 +127,9 @@ cc_library(version SRCS version.cc)
cc_test(version_test SRCS version_test.cc DEPS version)
cc_library(proto_desc SRCS var_desc.cc op_desc.cc block_desc.cc program_desc.cc DEPS shape_inference op_info operator glog version)
cc_library(ngraph_bridge SRCS ngraph_bridge.cc DEPS operator framework_proto)
if(NOT WIN32)
cc_library(ngraph_bridge SRCS ngraph_bridge.cc DEPS operator framework_proto ngraph)
cc_library(ngraph_operator SRCS ngraph_operator.cc DEPS ngraph_bridge operator op_info device_context tensor scope glog
shape_inference data_transform lod_tensor profiler)
endif(NOT WIN32)

@ -32,9 +32,7 @@ enum OpInfoFillType {
kOpProtoAndCheckerMaker = 1,
kGradOpDescMaker = 2,
kVarTypeInference = 3,
kShapeInference = 4,
kEstimateFlops = 5,
kUnknown = -1
kShapeInference = 4
};
template <typename T>
@ -50,10 +48,8 @@ struct OpInfoFillTypeID {
? kVarTypeInference
: (std::is_base_of<InferShapeBase, T>::value
? kShapeInference
: (std::is_base_of<EstimateFlopsBase,
T>::value
? kEstimateFlops
: kUnknown)))));
: static_cast<OpInfoFillType>(
-1)))));
}
};
@ -143,16 +139,6 @@ struct OpInfoFiller<T, kShapeInference> {
}
};
template <typename T>
struct OpInfoFiller<T, kEstimateFlops> {
void operator()(const char* op_tpe, OpInfo* info) const {
info->estimate_flops_ = [](InferShapeContext* ctx) {
T estimate_flops;
return estimate_flops(ctx);
};
}
};
} // namespace details
} // namespace framework

@ -38,7 +38,7 @@ std::unique_ptr<ir::Graph> IsTestPass::ApplyImpl(
for (const Node* n : graph->Nodes()) {
if (n->IsOp()) {
auto* op = n->Op();
if (op->HasAttr("is_test")) {
if (n->RuntimeHasAttr("is_test")) {
op->SetAttr("is_test", true);
} else if (std::find(begin(op_list), end(op_list), op->Type()) !=
end(op_list)) {

@ -104,9 +104,9 @@ TEST(IsTestPass, basic) {
auto* op = node->Op();
auto op_name = boost::get<std::string>(op->GetAttr("name"));
if (op_name == "conv3") {
ASSERT_FALSE(op->HasAttr("is_test"));
ASSERT_FALSE(node->RuntimeHasAttr("is_test"));
} else {
ASSERT_TRUE(op->HasAttr("is_test"));
ASSERT_TRUE(node->RuntimeHasAttr("is_test"));
EXPECT_TRUE(boost::get<bool>(op->GetAttr("is_test")));
}
}

@ -22,7 +22,7 @@ std::unique_ptr<ir::Graph> MKLDNNPlacementPass::ApplyImpl(
std::unique_ptr<ir::Graph> graph) const {
VLOG(3) << "Aplies MKL-DNN placement strategy.";
for (const Node* n : graph->Nodes()) {
if (n->IsOp() && n->Op()->HasAttr("use_mkldnn")) {
if (n->IsOp() && n->RuntimeHasAttr("use_mkldnn")) {
n->Op()->SetAttr("use_mkldnn", true);
}
}

@ -13,6 +13,7 @@ See the License for the specific language governing permissions and
limitations under the License. */
#include "paddle/fluid/framework/ir/node.h"
#include "paddle/fluid/framework/op_info.h"
namespace paddle {
namespace framework {
@ -24,10 +25,33 @@ constexpr char Node::kControlDepVarName[];
const char Node::kControlDepVarName[] = "__control_var";
#endif
std::unique_ptr<Node> CreateNodeForTest(const std::string& name,
std::unique_ptr<Node> CreateNodeForTest(const std::string &name,
Node::Type type) {
return std::unique_ptr<Node>(new Node(name, type));
}
bool Node::RuntimeHasAttr(const std::string &name) const {
if (Op()->HasAttr(name)) {
return true;
} else {
auto &op_info = OpInfoMap::Instance();
auto op_type = Op()->Type();
if (op_info.Has(op_type)) {
auto op_info_ptr = op_info.Get(op_type);
if (op_info_ptr.HasOpProtoAndChecker()) {
const proto::OpProto &proto = op_info_ptr.Proto();
for (int i = 0; i != proto.attrs_size(); ++i) {
const proto::OpProto::Attr &attr = proto.attrs(i);
if (attr.name() == name) {
return true;
}
}
}
}
}
return false;
}
} // namespace ir
} // namespace framework
} // namespace paddle

@ -108,6 +108,18 @@ class Node {
Name().find(ir::Node::kControlDepVarName) != std::string::npos;
}
// RuntimeHasAttr is different with HasAttr now.
// 1. For Op()->HasAttr(), it judges whether a stored program_desc_ has attr,
// thus, if stored program_desc_ are old which don't have an attr, a new
// library which adds the attr already will fail on this function.
// Details:
// https://github.com/PaddlePaddle/Paddle/pull/14608#issuecomment-442309087
// 2. For Op()->RuntimeHasAttr, it judges the attr in runtime to avoid above
// problem.
// TODO(luotao): Maybe we should enhance HasAttr later, instead of adding
// RuntimeHasAttr.
bool RuntimeHasAttr(const std::string& name) const;
std::vector<Node*> inputs;
std::vector<Node*> outputs;

@ -15,23 +15,105 @@ limitations under the License. */
#ifdef PADDLE_WITH_NGRAPH
#include <algorithm>
#include <functional>
#include <vector>
#include "paddle/fluid/framework/ngraph_bridge.h"
#include "paddle/fluid/framework/operator.h"
#include "paddle/fluid/platform/enforce.h"
#include "ngraph/ngraph.hpp"
namespace paddle {
namespace framework {
static std::shared_ptr<ngraph::Node> GetNode(
const std::shared_ptr<OperatorBase>& op, const std::string prm,
const VariableNameMap& var_map,
std::shared_ptr<
std::unordered_map<std::string, std::shared_ptr<ngraph::Node>>>
ngb_node_map) {
auto& var_names = var_map.at(prm);
PADDLE_ENFORCE_EQ(var_names.size(), 1,
"op %s prm %s expects one associated var", op->Type(), prm);
if (ngb_node_map->find(var_names[0]) != ngb_node_map->end()) {
return (*ngb_node_map)[var_names[0]];
} else {
return nullptr;
}
}
static std::shared_ptr<ngraph::Node> GetInputNode(
const std::shared_ptr<OperatorBase>& op, const std::string prm,
std::shared_ptr<
std::unordered_map<std::string, std::shared_ptr<ngraph::Node>>>
ngb_node_map) {
return GetNode(op, prm, op->Inputs(), ngb_node_map);
}
static std::shared_ptr<ngraph::Node> GetOutputNode(
const std::shared_ptr<OperatorBase>& op, const std::string prm,
std::shared_ptr<
std::unordered_map<std::string, std::shared_ptr<ngraph::Node>>>
ngb_node_map) {
return GetNode(op, prm, op->Outputs(), ngb_node_map);
}
static void SetOutputNode(
const std::shared_ptr<OperatorBase>& op, const std::string prm,
std::shared_ptr<ngraph::Node> node,
std::shared_ptr<
std::unordered_map<std::string, std::shared_ptr<ngraph::Node>>>
ngb_node_map) {
auto& var_names = op->Outputs().at(prm);
if (var_names.size() == 1) {
(*ngb_node_map)[var_names[0]] = node;
} else if (var_names.size() == 0) {
(*ngb_node_map)[""] = node;
} else {
PADDLE_THROW("prm %s has more than 1 var_names.", prm);
}
}
static bool HasOutput(const std::shared_ptr<OperatorBase>& op,
const std::string prm) {
auto& outputs = op->Outputs();
if (outputs.find(prm) == outputs.end()) return false;
return outputs.at(prm).size() > 0;
}
template <typename T>
static void BuildBinaryNode(
const std::shared_ptr<OperatorBase>& op,
std::shared_ptr<
std::unordered_map<std::string, std::shared_ptr<ngraph::Node>>>
ngb_node_map) {
auto x = GetInputNode(op, "X", ngb_node_map);
auto y = GetInputNode(op, "Y", ngb_node_map);
auto out = std::make_shared<T>(x, y);
SetOutputNode(op, "Out", out, ngb_node_map);
}
template <typename T>
static void BuildUnaryNode(
const std::shared_ptr<OperatorBase>& op,
std::shared_ptr<
std::unordered_map<std::string, std::shared_ptr<ngraph::Node>>>
ngb_node_map) {
auto input = GetInputNode(op, "X", ngb_node_map);
auto out = std::make_shared<T>(input);
SetOutputNode(op, "Out", out, ngb_node_map);
}
std::map<std::string,
std::function<void(const std::shared_ptr<OperatorBase>&,
std::shared_ptr<std::unordered_map<
std::string, std::shared_ptr<ngraph::Node>>>)>>
NgraphBridge::NG_NODE_MAP = {};
NgraphBridge::NG_NODE_MAP = {{"relu", BuildUnaryNode<ngraph::op::Relu>},
{"tanh", BuildUnaryNode<ngraph::op::Tanh>}};
void NgraphBridge::build_graph(const std::shared_ptr<OperatorBase>& op) {
void NgraphBridge::BuildNgNode(const std::shared_ptr<OperatorBase>& op) {
auto& op_type = op->Type();
NG_NODE_MAP[op_type](op, ngb_node_map);
NG_NODE_MAP[op_type](op, ngb_node_map_);
}
} // namespace framework

@ -20,16 +20,14 @@ limitations under the License. */
#include <map>
#include <string>
#include <unordered_map>
#include <vector>
#include "paddle/fluid/framework/operator.h"
#include "paddle/fluid/platform/enforce.h"
#include "ngraph/ngraph.hpp"
#include "ngraph/node.hpp"
namespace paddle {
namespace framework {
class OperatorBase;
class NgraphBridge {
public:
static std::map<
@ -43,14 +41,14 @@ class NgraphBridge {
std::shared_ptr<
std::unordered_map<std::string, std::shared_ptr<ngraph::Node>>>
var_node_map)
: ngb_node_map(var_node_map) {}
: ngb_node_map_(var_node_map) {}
void build_graph(const std::shared_ptr<OperatorBase>& op);
void BuildNgNode(const std::shared_ptr<OperatorBase>& op);
private:
std::shared_ptr<
std::unordered_map<std::string, std::shared_ptr<ngraph::Node>>>
ngb_node_map;
ngb_node_map_;
};
} // namespace framework

File diff suppressed because it is too large Load Diff

@ -17,24 +17,19 @@ limitations under the License. */
#ifdef PADDLE_WITH_NGRAPH
#include <algorithm>
#include <atomic>
#include <string>
#include <unordered_map>
#include <vector>
#include "paddle/fluid/framework/attribute.h"
#include "paddle/fluid/framework/framework.pb.h"
#include "paddle/fluid/framework/lod_tensor.h"
#include "paddle/fluid/framework/ngraph_bridge.h"
#include "paddle/fluid/framework/op_info.h"
#include "paddle/fluid/framework/op_kernel_type.h"
#include "paddle/fluid/framework/operator.h"
#include "paddle/fluid/framework/program_desc.h"
#include "paddle/fluid/framework/scope.h"
#include "paddle/fluid/framework/tensor.h"
#include "paddle/fluid/platform/variant.h"
#include "ngraph/ngraph.hpp"
#include "ngraph/type/element_type.hpp"
namespace paddle {
namespace framework {

@ -31,12 +31,6 @@ class InferShapeBase {
virtual void operator()(InferShapeContext*) const = 0;
};
class EstimateFlopsBase {
public:
virtual ~EstimateFlopsBase() = default;
virtual size_t operator()(InferShapeContext*) const = 0;
};
struct OpInfo {
OpCreator creator_;
GradOpMakerFN grad_op_maker_;
@ -44,7 +38,6 @@ struct OpInfo {
OpAttrChecker* checker_{nullptr};
InferVarTypeFN infer_var_type_;
InferShapeFN infer_shape_;
EstimateFlopsFN estimate_flops_;
bool HasOpProtoAndChecker() const {
return proto_ != nullptr && checker_ != nullptr;

@ -695,6 +695,12 @@ static void CheckTensorNANOrInf(const std::string& name,
"Tensor %s contains NAN", name);
}
void OperatorWithKernel::RuntimeInferShape(const Scope& scope,
const platform::Place& place) const {
RuntimeInferShapeContext infer_shape_ctx(*this, scope);
this->InferShape(&infer_shape_ctx);
}
void OperatorWithKernel::RunImpl(const Scope& scope,
const platform::Place& place) const {
RuntimeInferShapeContext infer_shape_ctx(*this, scope);

@ -128,6 +128,8 @@ class OperatorBase {
virtual std::vector<std::string> OutputVars(bool has_intermediate) const;
void SetIsCalledByExecutor(bool x) { run_by_executor_ = x; }
virtual void RuntimeInferShape(const Scope& scope,
const platform::Place& place) const {}
protected:
std::string type_;
@ -348,6 +350,9 @@ class OperatorWithKernel : public OperatorBase {
OpInfoMap::Instance().Get(Type()).infer_shape_(ctx);
}
void RuntimeInferShape(const Scope& scope,
const platform::Place& place) const override;
protected:
virtual OpKernelType GetExpectedKernelType(const ExecutionContext& ctx) const;
virtual OpKernelType GetKernelTypeForVar(

@ -54,7 +54,5 @@ using InferVarTypeFN =
using InferShapeFN = std::function<void(InferShapeContext*)>;
using EstimateFlopsFN = std::function<void(InferShapeContext*)>;
} // namespace framework
} // namespace paddle

@ -79,6 +79,16 @@ link_directories("${PADDLE_LIB}/third_party/install/gflags/lib")
link_directories("${PADDLE_LIB}/third_party/install/xxhash/lib")
link_directories("${PADDLE_LIB}/paddle/lib")
if (NOT WIN32)
set(NGRAPH_PATH "${PADDLE_LIB}/third_party/install/ngraph")
if(EXISTS ${NGRAPH_PATH})
include(GNUInstallDirs)
include_directories("${NGRAPH_PATH}/include")
link_directories("${NGRAPH_PATH}/${CMAKE_INSTALL_LIBDIR}")
set(NGRAPH_LIB ${NGRAPH_PATH}/${CMAKE_INSTALL_LIBDIR}/libngraph${CMAKE_SHARED_LIBRARY_SUFFIX})
endif()
endif()
add_executable(${DEMO_NAME} ${DEMO_NAME}.cc)
if(WITH_MKL)
@ -106,7 +116,7 @@ endif()
if (NOT WIN32)
set(EXTERNAL_LIB "-lrt -ldl -lpthread")
set(DEPS ${DEPS}
${MATH_LIB} ${MKLDNN_LIB}
${MATH_LIB} ${MKLDNN_LIB} ${NGRAPH_LIB}
glog gflags protobuf snappystream snappy z xxhash
${EXTERNAL_LIB})
else()

@ -36,13 +36,11 @@ class SequenceMaskOp : public framework::OperatorWithKernel {
PADDLE_ENFORCE(ctx->HasInput("X"), "Input(X) must exist");
PADDLE_ENFORCE(ctx->HasOutput("Y"), "Output(Y) must exist");
auto maxlen = ctx->Attrs().Get<int>("maxlen");
if (maxlen > 0) { // We can only infershape when maxlen > 0
int maxlen = ctx->Attrs().Get<int>("maxlen");
auto dim = framework::vectorize2int(ctx->GetInputDim("X"));
dim.push_back(maxlen);
dim.push_back(maxlen > 0 ? maxlen : -1);
ctx->SetOutputDim("Y", framework::make_ddim(dim));
}
}
};
class SequenceMaskOpMaker : public framework::OpProtoAndCheckerMaker {

@ -18,6 +18,7 @@ namespace paddle {
namespace operators {
using framework::Tensor;
const int kIgnoreIndex = -100;
class SigmoidCrossEntropyWithLogitsOp : public framework::OperatorWithKernel {
public:
@ -100,6 +101,11 @@ class SigmoidCrossEntropyWithLogitsOpMaker
AddOutput("Out",
"(Tensor, default Tensor<float>), a 2-D tensor with shape N x D "
" of elementwise logistic losses.");
AddAttr<int>("ignore_index",
"(int, default kIgnoreIndex), Specifies a target value that "
"is ignored and"
"does not contribute to the input gradient.")
.SetDefault(kIgnoreIndex);
AddComment(R"DOC(
SigmoidCrossEntropyWithLogits Operator.

@ -15,33 +15,72 @@ limitations under the License. */
#pragma once
#include "paddle/fluid/framework/eigen.h"
#include "paddle/fluid/framework/op_registry.h"
#include "paddle/fluid/platform/hostdevice.h"
#include "paddle/legacy/utils/Logging.h"
namespace paddle {
namespace operators {
using Tensor = framework::Tensor;
template <typename T, int MajorType = Eigen::RowMajor,
typename IndexType = Eigen::DenseIndex>
using EigenVector = framework::EigenVector<T, MajorType, IndexType>;
template <typename T, int MajorType = Eigen::RowMajor,
typename IndexType = Eigen::DenseIndex>
using EigenMatrix = framework::EigenMatrix<T, MajorType, IndexType>;
template <typename T>
struct SigmoidCrossEntropyWithLogitsForward {
HOSTDEVICE SigmoidCrossEntropyWithLogitsForward(const int &ignore_index)
: ignore_index(ignore_index) {}
HOSTDEVICE T operator()(const T &x, const T &label) const {
if (static_cast<int>(label) == ignore_index) {
return static_cast<T>(0.);
}
T term1 = (x > 0) ? x : 0;
T term2 = x * label;
T term3 = std::log(static_cast<T>(1) + std::exp(-(std::abs(x))));
return term1 - term2 + term3;
}
int ignore_index;
};
template <typename T>
struct SigmoidCrossEntropyWithLogitsBackward {
HOSTDEVICE SigmoidCrossEntropyWithLogitsBackward(const int &ignore_index)
: ignore_index(ignore_index) {}
HOSTDEVICE T operator()(const T &x, const T &label) const {
if (static_cast<int>(label) == ignore_index) {
return static_cast<T>(0.);
}
T simoid_x = static_cast<T>(1) / (static_cast<T>(1) + std::exp(-x));
return simoid_x - label;
}
int ignore_index;
};
// Out = max(X, 0) - X * Labels + log(1 + exp(-abs(X)))
template <typename DeviceContext, typename T>
class SigmoidCrossEntropyWithLogitsKernel : public framework::OpKernel<T> {
public:
void Compute(const framework::ExecutionContext &context) const override {
const framework::Tensor *X = context.Input<framework::Tensor>("X");
const framework::Tensor *Labels = context.Input<framework::Tensor>("Label");
framework::Tensor *Out = context.Output<framework::Tensor>("Out");
const Tensor *X = context.Input<Tensor>("X");
const Tensor *Labels = context.Input<Tensor>("Label");
Tensor *Out = context.Output<Tensor>("Out");
Out->mutable_data<T>(context.GetPlace());
int ignore_index = context.Attr<int>("ignore_index");
auto x = framework::EigenVector<T>::Flatten(*X);
auto labels = framework::EigenVector<T>::Flatten(*Labels);
auto out = framework::EigenVector<T>::Flatten(*Out);
auto x = EigenVector<T>::Flatten(*X);
auto labels = EigenVector<T>::Flatten(*Labels);
auto out = EigenVector<T>::Flatten(*Out);
auto &place = *context.device_context<DeviceContext>().eigen_device();
// term1 = max(x, 0)
auto term1 = x.cwiseMax(static_cast<T>(0));
// term2 = x * labels
auto term2 = x * labels;
// term3 = log(1 + exp(-abs(x)))
auto term3 = (static_cast<T>(1) + (-(x.abs())).exp()).log();
out.device(place) = term1 - term2 + term3;
out.device(place) = x.binaryExpr(
labels, SigmoidCrossEntropyWithLogitsForward<T>(ignore_index));
}
};
@ -50,23 +89,23 @@ template <typename DeviceContext, typename T>
class SigmoidCrossEntropyWithLogitsGradKernel : public framework::OpKernel<T> {
public:
void Compute(const framework::ExecutionContext &context) const override {
const framework::Tensor *X = context.Input<framework::Tensor>("X");
const framework::Tensor *Labels = context.Input<framework::Tensor>("Label");
const framework::Tensor *dOut =
context.Input<framework::Tensor>(framework::GradVarName("Out"));
framework::Tensor *dX =
context.Output<framework::Tensor>(framework::GradVarName("X"));
const Tensor *X = context.Input<Tensor>("X");
const Tensor *Labels = context.Input<Tensor>("Label");
const Tensor *dOut = context.Input<Tensor>(framework::GradVarName("Out"));
Tensor *dX = context.Output<Tensor>(framework::GradVarName("X"));
dX->mutable_data<T>(context.GetPlace());
auto x = framework::EigenVector<T>::Flatten(*X);
auto labels = framework::EigenVector<T>::Flatten(*Labels);
auto dout = framework::EigenVector<T>::Flatten(*dOut);
auto dx = framework::EigenVector<T>::Flatten(*dX);
auto ignore_index = context.Attr<int>("ignore_index");
auto x = EigenVector<T>::Flatten(*X);
auto labels = EigenVector<T>::Flatten(*Labels);
auto dout = EigenVector<T>::Flatten(*dOut);
auto dx = EigenVector<T>::Flatten(*dX);
auto &place =
*context.template device_context<DeviceContext>().eigen_device();
auto sigmoid_x = static_cast<T>(1) / (static_cast<T>(1) + (-x).exp());
dx.device(place) = dout * (sigmoid_x - labels);
auto diff = x.binaryExpr(labels, SigmoidCrossEntropyWithLogitsBackward<T>(
static_cast<int>(ignore_index)));
dx.device(place) = dout * diff;
}
};

@ -0,0 +1,221 @@
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "paddle/fluid/operators/yolov3_loss_op.h"
#include "paddle/fluid/framework/op_registry.h"
namespace paddle {
namespace operators {
using framework::Tensor;
class Yolov3LossOp : public framework::OperatorWithKernel {
public:
using framework::OperatorWithKernel::OperatorWithKernel;
void InferShape(framework::InferShapeContext* ctx) const override {
PADDLE_ENFORCE(ctx->HasInput("X"),
"Input(X) of Yolov3LossOp should not be null.");
PADDLE_ENFORCE(ctx->HasInput("GTBox"),
"Input(GTBox) of Yolov3LossOp should not be null.");
PADDLE_ENFORCE(ctx->HasInput("GTLabel"),
"Input(GTLabel) of Yolov3LossOp should not be null.");
PADDLE_ENFORCE(ctx->HasOutput("Loss"),
"Output(Loss) of Yolov3LossOp should not be null.");
auto dim_x = ctx->GetInputDim("X");
auto dim_gtbox = ctx->GetInputDim("GTBox");
auto dim_gtlabel = ctx->GetInputDim("GTLabel");
auto anchors = ctx->Attrs().Get<std::vector<int>>("anchors");
auto class_num = ctx->Attrs().Get<int>("class_num");
PADDLE_ENFORCE_EQ(dim_x.size(), 4, "Input(X) should be a 4-D tensor.");
PADDLE_ENFORCE_EQ(dim_x[2], dim_x[3],
"Input(X) dim[3] and dim[4] should be euqal.");
PADDLE_ENFORCE_EQ(dim_x[1], anchors.size() / 2 * (5 + class_num),
"Input(X) dim[1] should be equal to (anchor_number * (5 "
"+ class_num)).");
PADDLE_ENFORCE_EQ(dim_gtbox.size(), 3,
"Input(GTBox) should be a 3-D tensor");
PADDLE_ENFORCE_EQ(dim_gtbox[2], 4, "Input(GTBox) dim[2] should be 5");
PADDLE_ENFORCE_EQ(dim_gtlabel.size(), 2,
"Input(GTBox) should be a 2-D tensor");
PADDLE_ENFORCE_EQ(dim_gtlabel[0], dim_gtbox[0],
"Input(GTBox) and Input(GTLabel) dim[0] should be same");
PADDLE_ENFORCE_EQ(dim_gtlabel[1], dim_gtbox[1],
"Input(GTBox) and Input(GTLabel) dim[1] should be same");
PADDLE_ENFORCE_GT(anchors.size(), 0,
"Attr(anchors) length should be greater then 0.");
PADDLE_ENFORCE_EQ(anchors.size() % 2, 0,
"Attr(anchors) length should be even integer.");
PADDLE_ENFORCE_GT(class_num, 0,
"Attr(class_num) should be an integer greater then 0.");
std::vector<int64_t> dim_out({1});
ctx->SetOutputDim("Loss", framework::make_ddim(dim_out));
}
protected:
framework::OpKernelType GetExpectedKernelType(
const framework::ExecutionContext& ctx) const override {
return framework::OpKernelType(
framework::ToDataType(ctx.Input<Tensor>("X")->type()),
platform::CPUPlace());
}
};
class Yolov3LossOpMaker : public framework::OpProtoAndCheckerMaker {
public:
void Make() override {
AddInput("X",
"The input tensor of YOLO v3 loss operator, "
"This is a 4-D tensor with shape of [N, C, H, W]."
"H and W should be same, and the second dimention(C) stores"
"box locations, confidence score and classification one-hot"
"key of each anchor box");
AddInput("GTBox",
"The input tensor of ground truth boxes, "
"This is a 3-D tensor with shape of [N, max_box_num, 5], "
"max_box_num is the max number of boxes in each image, "
"In the third dimention, stores x, y, w, h coordinates, "
"x, y is the center cordinate of boxes and w, h is the "
"width and height and x, y, w, h should be divided by "
"input image height to scale to [0, 1].");
AddInput("GTLabel",
"The input tensor of ground truth label, "
"This is a 2-D tensor with shape of [N, max_box_num], "
"and each element shoudl be an integer to indicate the "
"box class id.");
AddOutput("Loss",
"The output yolov3 loss tensor, "
"This is a 1-D tensor with shape of [1]");
AddAttr<int>("class_num", "The number of classes to predict.");
AddAttr<std::vector<int>>("anchors",
"The anchor width and height, "
"it will be parsed pair by pair.");
AddAttr<float>("ignore_thresh",
"The ignore threshold to ignore confidence loss.");
AddAttr<float>("loss_weight_xy", "The weight of x, y location loss.")
.SetDefault(1.0);
AddAttr<float>("loss_weight_wh", "The weight of w, h location loss.")
.SetDefault(1.0);
AddAttr<float>(
"loss_weight_conf_target",
"The weight of confidence score loss in locations with target object.")
.SetDefault(1.0);
AddAttr<float>("loss_weight_conf_notarget",
"The weight of confidence score loss in locations without "
"target object.")
.SetDefault(1.0);
AddAttr<float>("loss_weight_class", "The weight of classification loss.")
.SetDefault(1.0);
AddComment(R"DOC(
This operator generate yolov3 loss by given predict result and ground
truth boxes.
The output of previous network is in shape [N, C, H, W], while H and W
should be the same, specify the grid size, each grid point predict given
number boxes, this given number is specified by anchors, it should be
half anchors length, which following will be represented as S. In the
second dimention(the channel dimention), C should be S * (class_num + 5),
class_num is the box categoriy number of source dataset(such as coco),
so in the second dimention, stores 4 box location coordinates x, y, w, h
and confidence score of the box and class one-hot key of each anchor box.
While the 4 location coordinates if $$tx, ty, tw, th$$, the box predictions
correspnd to:
$$
b_x = \sigma(t_x) + c_x
b_y = \sigma(t_y) + c_y
b_w = p_w e^{t_w}
b_h = p_h e^{t_h}
$$
While $$c_x, c_y$$ is the left top corner of current grid and $$p_w, p_h$$
is specified by anchors.
As for confidence score, it is the logistic regression value of IoU between
anchor boxes and ground truth boxes, the score of the anchor box which has
the max IoU should be 1, and if the anchor box has IoU bigger then ignore
thresh, the confidence score loss of this anchor box will be ignored.
Therefore, the yolov3 loss consist of three major parts, box location loss,
confidence score loss, and classification loss. The MSE loss is used for
box location, and binary cross entropy loss is used for confidence score
loss and classification loss.
Final loss will be represented as follow.
$$
loss = \loss_weight_{xy} * loss_{xy} + \loss_weight_{wh} * loss_{wh}
+ \loss_weight_{conf_target} * loss_{conf_target}
+ \loss_weight_{conf_notarget} * loss_{conf_notarget}
+ \loss_weight_{class} * loss_{class}
$$
)DOC");
}
};
class Yolov3LossOpGrad : public framework::OperatorWithKernel {
public:
using framework::OperatorWithKernel::OperatorWithKernel;
void InferShape(framework::InferShapeContext* ctx) const override {
PADDLE_ENFORCE(ctx->HasInput("X"), "Input(X) should not be null");
PADDLE_ENFORCE(ctx->HasInput(framework::GradVarName("Loss")),
"Input(Loss@GRAD) should not be null");
auto dim_x = ctx->GetInputDim("X");
if (ctx->HasOutput(framework::GradVarName("X"))) {
ctx->SetOutputDim(framework::GradVarName("X"), dim_x);
}
}
protected:
framework::OpKernelType GetExpectedKernelType(
const framework::ExecutionContext& ctx) const override {
return framework::OpKernelType(
framework::ToDataType(ctx.Input<Tensor>("X")->type()),
platform::CPUPlace());
}
};
class Yolov3LossGradMaker : public framework::SingleGradOpDescMaker {
public:
using framework::SingleGradOpDescMaker::SingleGradOpDescMaker;
protected:
std::unique_ptr<framework::OpDesc> Apply() const override {
auto* op = new framework::OpDesc();
op->SetType("yolov3_loss_grad");
op->SetInput("X", Input("X"));
op->SetInput("GTBox", Input("GTBox"));
op->SetInput("GTLabel", Input("GTLabel"));
op->SetInput(framework::GradVarName("Loss"), OutputGrad("Loss"));
op->SetAttrMap(Attrs());
op->SetOutput(framework::GradVarName("X"), InputGrad("X"));
op->SetOutput(framework::GradVarName("GTBox"), {});
op->SetOutput(framework::GradVarName("GTLabel"), {});
return std::unique_ptr<framework::OpDesc>(op);
}
};
} // namespace operators
} // namespace paddle
namespace ops = paddle::operators;
REGISTER_OPERATOR(yolov3_loss, ops::Yolov3LossOp, ops::Yolov3LossOpMaker,
ops::Yolov3LossGradMaker);
REGISTER_OPERATOR(yolov3_loss_grad, ops::Yolov3LossOpGrad);
REGISTER_OP_CPU_KERNEL(yolov3_loss, ops::Yolov3LossKernel<float>,
ops::Yolov3LossKernel<double>);
REGISTER_OP_CPU_KERNEL(yolov3_loss_grad, ops::Yolov3LossGradKernel<float>,
ops::Yolov3LossGradKernel<double>);

File diff suppressed because it is too large Load Diff

@ -442,6 +442,8 @@ EOF
make install -j 8
if [ "$1" == "cp27-cp27m" ]; then
pip install --user ${INSTALL_PREFIX:-/paddle/build}/opt/paddle/share/wheels/*.whl
set -e
python -c "import paddle.fluid"
elif [ "$1" == "cp35-cp35m" ]; then
pip3.5 install --user ${INSTALL_PREFIX:-/paddle/build}/opt/paddle/share/wheels/*.whl
elif [ "$1" == "cp36-cp36m" ]; then

Some files were not shown because too many files have changed in this diff Show More

Loading…
Cancel
Save