revert revert 16144

test=develop
revert-15164-speed/enhance_cudnn
sneaxiy 6 years ago
parent ad5f0e6018
commit 5a92e4c097

File diff suppressed because it is too large Load Diff

@ -27,3 +27,13 @@ REGISTER_OP_CUDA_KERNEL(
cross_entropy_grad, ops::CrossEntropyGradientOpKernel<CUDACtx, float>,
ops::CrossEntropyGradientOpKernel<CUDACtx, double>,
ops::CrossEntropyGradientOpKernel<CUDACtx, plat::float16>);
REGISTER_OP_CUDA_KERNEL(cross_entropy2,
ops::CrossEntropyOpKernel2<CUDACtx, float>,
ops::CrossEntropyOpKernel2<CUDACtx, double>,
ops::CrossEntropyOpKernel2<CUDACtx, plat::float16>);
REGISTER_OP_CUDA_KERNEL(
cross_entropy_grad2, ops::CrossEntropyGradientOpKernel2<CUDACtx, float>,
ops::CrossEntropyGradientOpKernel2<CUDACtx, double>,
ops::CrossEntropyGradientOpKernel2<CUDACtx, plat::float16>);

@ -15,6 +15,7 @@ limitations under the License. */
#pragma once
#include "paddle/fluid/framework/eigen.h"
#include "paddle/fluid/framework/op_registry.h"
#include "paddle/fluid/operators/math.h"
#include "paddle/fluid/operators/math/cross_entropy.h"
#include "paddle/fluid/operators/math/math_function.h"
#include "paddle/fluid/platform/for_range.h"
@ -137,5 +138,85 @@ class CrossEntropyGradientOpKernel : public framework::OpKernel<T> {
}
};
template <typename T>
struct HardLabelCrossEntropyBackwardFunctor {
HardLabelCrossEntropyBackwardFunctor(T* dx, const T* y, const T* dy,
const int64_t* label,
int64_t ignore_index,
int64_t feature_size)
: dx_(dx),
y_(y),
dy_(dy),
label_(label),
ignore_index_(ignore_index),
feature_size_(feature_size) {}
HOSTDEVICE void operator()(int64_t idx) const {
auto row_idx = idx / feature_size_;
auto col_idx = idx % feature_size_;
auto label = label_[row_idx];
if (label == col_idx && label != ignore_index_) {
dx_[idx] = -dy_[row_idx] * real_exp(y_[row_idx]);
} else {
dx_[idx] = 0;
}
}
T* dx_;
const T* y_;
const T* dy_;
const int64_t* label_;
int64_t ignore_index_;
int64_t feature_size_;
};
template <typename DeviceContext, typename T>
class CrossEntropyOpKernel2 : public framework::OpKernel<T> {
public:
void Compute(const framework::ExecutionContext& ctx) const override {
auto* x_original = ctx.Input<Tensor>("X");
int rank = x_original->dims().size();
auto x = framework::ReshapeToMatrix(*x_original, rank - 1);
auto label =
framework::ReshapeToMatrix(*ctx.Input<Tensor>("Label"), rank - 1);
auto* y = ctx.Output<Tensor>("Y");
y->mutable_data<T>(ctx.GetPlace());
auto ignore_index = ctx.Attr<int>("ignore_index");
math::CrossEntropyFunctor<DeviceContext, T>()(
ctx.template device_context<DeviceContext>(), y, &x, &label, false,
ignore_index);
}
};
template <typename DeviceContext, typename T>
class CrossEntropyGradientOpKernel2 : public framework::OpKernel<T> {
public:
void Compute(const framework::ExecutionContext& ctx) const override {
auto* dx = ctx.Output<Tensor>(framework::GradVarName("X"));
auto* y = ctx.Input<Tensor>("Y");
auto* dy = ctx.Input<Tensor>(framework::GradVarName("Y"));
auto* label = ctx.Input<Tensor>("Label");
auto* p_dx = dx->mutable_data<T>(ctx.GetPlace());
auto* p_y = y->data<T>();
auto* p_dy = dy->data<T>();
auto* p_label = label->data<int64_t>();
int64_t ignore_index = ctx.Attr<int>("ignore_index");
int rank = dx->dims().size();
int64_t feature_size = dx->dims()[rank - 1];
int64_t batch_size = framework::product(dx->dims()) / feature_size;
platform::ForRange<DeviceContext> for_range(
ctx.template device_context<DeviceContext>(),
batch_size * feature_size);
for_range(HardLabelCrossEntropyBackwardFunctor<T>(
p_dx, p_y, p_dy, p_label, ignore_index, feature_size));
}
};
} // namespace operators
} // namespace paddle

@ -13,6 +13,7 @@ See the License for the specific language governing permissions and
limitations under the License. */
#include "paddle/fluid/operators/expand_op.h"
#include <memory>
#include <vector>
namespace paddle {
@ -138,12 +139,28 @@ class ExpandGradOp : public framework::OperatorWithKernel {
}
};
class ExpandGradOpDescMaker : public framework::SingleGradOpDescMaker {
public:
using framework::SingleGradOpDescMaker::SingleGradOpDescMaker;
protected:
std::unique_ptr<framework::OpDesc> Apply() const override {
std::unique_ptr<framework::OpDesc> op(new framework::OpDesc());
op->SetType("expand_grad");
op->SetInput("X", Input("X"));
op->SetInput(framework::GradVarName("Out"), OutputGrad("Out"));
op->SetOutput(framework::GradVarName("X"), InputGrad("X"));
op->SetAttrMap(Attrs());
return op;
}
};
} // namespace operators
} // namespace paddle
namespace ops = paddle::operators;
REGISTER_OPERATOR(expand, ops::ExpandOp, ops::ExpandOpMaker,
paddle::framework::DefaultGradOpDescMaker<true>);
ops::ExpandGradOpDescMaker);
REGISTER_OPERATOR(expand_grad, ops::ExpandGradOp);
REGISTER_OP_CPU_KERNEL(
expand, ops::ExpandKernel<paddle::platform::CPUDeviceContext, float>,

@ -0,0 +1,42 @@
// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include "paddle/fluid/platform/float16.h"
#include "paddle/fluid/platform/hostdevice.h"
#include "math.h" // NOLINT
namespace paddle {
namespace operators {
inline HOSTDEVICE platform::float16 real_exp(platform::float16 x) {
return static_cast<platform::float16>(::expf(static_cast<float>(x)));
}
inline HOSTDEVICE float real_exp(float x) { return ::expf(x); }
inline HOSTDEVICE double real_exp(double x) { return ::exp(x); }
inline HOSTDEVICE platform::float16 real_log(platform::float16 x) {
return static_cast<platform::float16>(::logf(static_cast<float>(x)));
}
inline HOSTDEVICE float real_log(float x) { return ::logf(x); }
inline HOSTDEVICE double real_log(double x) { return ::log(x); }
} // namespace operators
} // namespace paddle

@ -12,6 +12,7 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "paddle/fluid/operators/math.h"
#include "paddle/fluid/operators/math/cross_entropy.h"
#include "paddle/fluid/platform/cuda_device_function.h"
#include "paddle/fluid/platform/cuda_primitives.h"
@ -20,17 +21,6 @@ namespace paddle {
namespace operators {
namespace math {
namespace {
__device__ __forceinline__ float real_log(float x) { return logf(x); }
__device__ __forceinline__ double real_log(double x) { return log(x); }
__device__ __forceinline__ platform::float16 real_log(
const platform::float16& val) {
return static_cast<platform::float16>(logf(static_cast<float>(val)));
}
template <typename T>
__global__ void CrossEntropyKernel(T* Y, const T* X, const int64_t* label,
const int N, const int D,
@ -61,7 +51,6 @@ __global__ void SoftCrossEntropyKernel(T* Y, const T* X, const T* label,
Y[blockIdx.x] = -val;
}
}
} // namespace
template <typename T>
class CrossEntropyFunctor<platform::CUDADeviceContext, T> {

@ -15,13 +15,12 @@ limitations under the License. */
#pragma once
#include <string>
#include "paddle/fluid/framework/op_registry.h"
#include "paddle/fluid/operators/math.h"
#include "paddle/fluid/platform/for_range.h"
namespace paddle {
namespace operators {
static HOSTDEVICE float real_exp(float x) { return expf(x); }
static HOSTDEVICE float real_exp(double x) { return exp(x); }
template <typename T>
struct SeluFunctor {
SeluFunctor(const T* x_data_ptr, float alpha, float scale, T* y_data_ptr)

@ -14,6 +14,7 @@ limitations under the License. */
#include <algorithm>
#include <cub/cub.cuh> // NOLINT
#include "paddle/fluid/operators/math.h"
#include "paddle/fluid/operators/sequence_ops/sequence_softmax_op.h"
namespace paddle {
@ -21,9 +22,6 @@ namespace operators {
using LoDTensor = framework::LoDTensor;
__device__ __forceinline__ float real_exp(float x) { return expf(x); }
__device__ __forceinline__ double real_exp(double x) { return exp(x); }
template <typename T, int BlockDim>
using BlockReduce = cub::BlockReduce<T, BlockDim>;

@ -12,6 +12,7 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "cub/cub.cuh"
#include "paddle/fluid/operators/math.h"
#include "paddle/fluid/operators/sigmoid_cross_entropy_with_logits_op.h"
#include "paddle/fluid/platform/cuda_primitives.h"
#include "paddle/fluid/platform/hostdevice.h"
@ -21,11 +22,6 @@ namespace operators {
using Tensor = framework::Tensor;
static HOSTDEVICE float real_exp(float x) { return expf(x); }
static HOSTDEVICE float real_exp(double x) { return exp(x); }
static HOSTDEVICE float real_log(float x) { return logf(x); }
static HOSTDEVICE float real_log(double x) { return log(x); }
static constexpr int kNumCUDAThreads = 512;
static constexpr int kNumMaxinumNumBlocks = 4096;

@ -1432,6 +1432,8 @@ def cross_entropy(input, label, soft_label=False, ignore_index=kIgnoreIndex):
predict = fluid.layers.fc(input=net, size=classdim, act='softmax')
cost = fluid.layers.cross_entropy(input=predict, label=label)
"""
if not soft_label:
return cross_entropy2(input, label, ignore_index)
helper = LayerHelper('cross_entropy', **locals())
out = helper.create_variable_for_type_inference(dtype=input.dtype)
helper.append_op(
@ -1444,6 +1446,20 @@ def cross_entropy(input, label, soft_label=False, ignore_index=kIgnoreIndex):
return out
def cross_entropy2(input, label, ignore_index=kIgnoreIndex):
helper = LayerHelper('cross_entropy2', **locals())
out = helper.create_variable_for_type_inference(dtype=input.dtype)
xshape = helper.create_variable_for_type_inference(dtype=input.dtype)
helper.append_op(
type='cross_entropy2',
inputs={'X': [input],
'Label': [label]},
outputs={'Y': [out],
'XShape': [xshape]},
attrs={'ignore_index': ignore_index})
return out
def bpr_loss(input, label, name=None):
"""
Bayesian Personalized Ranking Loss Operator.

@ -524,8 +524,8 @@ class TestLocalLookupTable(TestDistLookupTableBase):
ops = [
'lookup_table', 'sequence_pool', 'lookup_table', 'sequence_pool',
'lookup_table', 'sequence_pool', 'concat', 'mul', 'elementwise_add',
'cross_entropy', 'mean', 'fill_constant', 'mean_grad',
'cross_entropy_grad', 'elementwise_add_grad', 'send', 'mul_grad',
'cross_entropy2', 'mean', 'fill_constant', 'mean_grad',
'cross_entropy_grad2', 'elementwise_add_grad', 'send', 'mul_grad',
'send', 'concat_grad', 'sequence_pool_grad', 'lookup_table_grad',
'split_selected_rows', 'send', 'sequence_pool_grad',
'lookup_table_grad', 'sequence_pool_grad', 'lookup_table_grad',
@ -564,8 +564,8 @@ class TestDistLookupTable(TestDistLookupTableBase):
ops = [
'split_ids', 'prefetch', 'merge_ids', 'sequence_pool',
'sequence_pool', 'lookup_table', 'sequence_pool', 'concat', 'mul',
'elementwise_add', 'cross_entropy', 'mean', 'fill_constant',
'mean_grad', 'cross_entropy_grad', 'elementwise_add_grad', 'send',
'elementwise_add', 'cross_entropy2', 'mean', 'fill_constant',
'mean_grad', 'cross_entropy_grad2', 'elementwise_add_grad', 'send',
'mul_grad', 'send', 'concat_grad', 'sequence_pool_grad',
'lookup_table_grad', 'split_selected_rows', 'send',
'sequence_pool_grad', 'lookup_table_grad', 'sequence_pool_grad',
@ -612,8 +612,8 @@ class TestAsyncLocalLookupTable(TestDistLookupTableBase):
ops = [
'lookup_table', 'sequence_pool', 'lookup_table', 'sequence_pool',
'lookup_table', 'sequence_pool', 'concat', 'mul', 'elementwise_add',
'cross_entropy', 'mean', 'fill_constant', 'mean_grad',
'cross_entropy_grad', 'elementwise_add_grad', 'send', 'mul_grad',
'cross_entropy2', 'mean', 'fill_constant', 'mean_grad',
'cross_entropy_grad2', 'elementwise_add_grad', 'send', 'mul_grad',
'send', 'concat_grad', 'sequence_pool_grad', 'lookup_table_grad',
'split_selected_rows', 'send', 'sequence_pool_grad',
'lookup_table_grad', 'sequence_pool_grad', 'lookup_table_grad',
@ -652,8 +652,8 @@ class TestAsyncDistLookupTable(TestDistLookupTableBase):
ops = [
'split_ids', 'prefetch', 'merge_ids', 'sequence_pool',
'sequence_pool', 'lookup_table', 'sequence_pool', 'concat', 'mul',
'elementwise_add', 'cross_entropy', 'mean', 'fill_constant',
'mean_grad', 'cross_entropy_grad', 'elementwise_add_grad', 'send',
'elementwise_add', 'cross_entropy2', 'mean', 'fill_constant',
'mean_grad', 'cross_entropy_grad2', 'elementwise_add_grad', 'send',
'mul_grad', 'send', 'concat_grad', 'sequence_pool_grad',
'lookup_table_grad', 'split_selected_rows', 'send',
'sequence_pool_grad', 'lookup_table_grad', 'sequence_pool_grad',
@ -841,8 +841,8 @@ class TestRemoteLookupTable(TestDistLookupTableBase):
ops = [
'lookup_table', 'sequence_pool', 'lookup_table', 'sequence_pool',
'lookup_table', 'sequence_pool', 'concat', 'mul', 'elementwise_add',
'cross_entropy', 'mean', 'fill_constant', 'mean_grad',
'cross_entropy_grad', 'elementwise_add_grad', 'send', 'mul_grad',
'cross_entropy2', 'mean', 'fill_constant', 'mean_grad',
'cross_entropy_grad2', 'elementwise_add_grad', 'send', 'mul_grad',
'send', 'concat_grad', 'sequence_pool_grad', 'lookup_table_grad',
'split_selected_rows', 'send', 'sequence_pool_grad',
'lookup_table_grad', 'sequence_pool_grad', 'lookup_table_grad',

Loading…
Cancel
Save