fix merge issue

fix_conll05_bug
typhoonzero 8 years ago
commit 11bcb43a44

@ -2,7 +2,7 @@
|---|---|
| backyes | Yan-Fei Wang |
| beckett1124 | Bin Qi |
| Canpio | Jia-Yi Feng |
| JiayiFeng | Jia-Yi Feng |
| chengxiaohua1105 | Xiao-Hua Cheng |
| cxwangyi, yiwangbaidu, wangkuiyi | Yi Wang |
| cxysteven | Xing-Yi Cheng |

@ -82,7 +82,7 @@ language = 'zh_CN'
# List of patterns, relative to source directory, that match files and
# directories to ignore when looking for source files.
exclude_patterns = ['_build', '**/*_en*', '*_en*']
exclude_patterns = ['_build', '**/*_en*', '*_en*', 'api/*']
# The reST default role (used for this markup: `text`) to use for all
# documents.

@ -82,7 +82,7 @@ language = None
# List of patterns, relative to source directory, that match files and
# directories to ignore when looking for source files.
exclude_patterns = ['_build', '**/*_cn*', '*_cn*']
exclude_patterns = ['_build', '**/*_cn*', '*_cn*', 'api/*']
# The reST default role (used for this markup: `text`) to use for all
# documents.

@ -11,7 +11,6 @@ if(MOBILE_INFERENCE)
else()
add_subdirectory(pserver)
add_subdirectory(trainer)
add_subdirectory(string)
add_subdirectory(scripts)
if(WITH_C_API)

@ -4,3 +4,4 @@ add_subdirectory(framework)
add_subdirectory(operators)
add_subdirectory(pybind)
add_subdirectory(inference)
add_subdirectory(string)

@ -314,5 +314,15 @@ DDim stride(const DDim& ddim) {
}
return framework::make_ddim(strides);
}
DDim stride_numel(const framework::DDim& ddim) {
std::vector<int64_t> strides(ddim.size());
strides[ddim.size() - 1] = ddim[ddim.size() - 1];
for (int i = ddim.size() - 2; i >= 0; --i) {
strides[i] = strides[i + 1] * ddim[i];
}
return framework::make_ddim(strides);
}
} // namespace framework
} // namespace paddle

@ -125,6 +125,8 @@ DDim flatten_to_2d(const DDim& src, int num_col_dims);
DDim flatten_to_1d(const DDim& src);
DDim stride(const DDim& ddim);
DDim stride_numel(const DDim& ddim);
} // namespace framework
} // namespace paddle

@ -20,7 +20,7 @@ limitations under the License. */
#include "paddle/fluid/framework/operator.h"
#include "paddle/fluid/platform/device_context.h"
#include "paddle/fluid/platform/place.h"
#include "paddle/string/piece.h"
#include "paddle/fluid/string/piece.h"
namespace paddle {
namespace framework {

@ -37,9 +37,8 @@ class Vector {
// Fill vector with value. The vector size is `count`.
explicit Vector(size_t count, const T& value = T()) {
if (count == 0) {
InitEmpty();
} else {
InitEmpty();
if (count != 0) {
resize(count);
T* ptr = begin();
for (size_t i = 0; i < count; ++i) {
@ -122,6 +121,10 @@ class Vector {
const T* begin() const { return &this->operator[](0); }
const T* end() const { return &this->operator[](size()); }
const T* cbegin() const { return begin(); }
const T* cend() const { return end(); }
const T& back() const {
auto it = end();
--it;
@ -244,7 +247,9 @@ class Vector {
bool operator==(const Vector<T>& other) const {
if (size() != other.size()) return false;
for (auto it1 = begin(), it2 = other.begin(); it1 < end(); ++it1, ++it2) {
auto it1 = cbegin();
auto it2 = other.cbegin();
for (; it1 < cend(); ++it1, ++it2) {
if (*it1 != *it2) {
return false;
}

@ -26,10 +26,10 @@ TEST(mixed_vector, CPU_VECTOR) {
for (int i = 0; i < 10; ++i) {
tmp.push_back(i);
}
ASSERT_EQ(tmp.size(), 10);
ASSERT_EQ(tmp.size(), 10UL);
vec<int> tmp2;
tmp2 = tmp;
ASSERT_EQ(tmp2.size(), 10);
ASSERT_EQ(tmp2.size(), 10UL);
for (int i = 0; i < 10; ++i) {
ASSERT_EQ(tmp2[i], i);
ASSERT_EQ(tmp2[i], tmp[i]);
@ -58,7 +58,7 @@ TEST(mixed_vector, GPU_VECTOR) {
for (int i = 0; i < 10; ++i) {
tmp.push_back(i);
}
ASSERT_EQ(tmp.size(), 10);
ASSERT_EQ(tmp.size(), 10UL);
paddle::platform::CUDAPlace gpu(0);
multiply_10<<<1, 1, 0, GetCUDAStream(gpu)>>>(tmp.MutableData(gpu));
@ -79,7 +79,7 @@ TEST(mixed_vector, MultiGPU) {
for (int i = 0; i < 10; ++i) {
tmp.push_back(i);
}
ASSERT_EQ(tmp.size(), 10);
ASSERT_EQ(tmp.size(), 10UL);
paddle::platform::CUDAPlace gpu0(0);
paddle::platform::SetDeviceId(0);
multiply_10<<<1, 1, 0, GetCUDAStream(gpu0)>>>(tmp.MutableData(gpu0));
@ -91,3 +91,10 @@ TEST(mixed_vector, MultiGPU) {
ASSERT_EQ(tmp[i], i * 100);
}
}
TEST(mixed_vector, InitWithCount) {
paddle::framework::Vector<int> vec(10, 10);
for (int i = 0; i < 10; ++i) {
ASSERT_EQ(vec[i], 10);
}
}

@ -18,7 +18,7 @@ limitations under the License. */
#include <mutex> // for call_once
#include "glog/logging.h"
#include "paddle/fluid/framework/threadpool.h"
#include "paddle/string/printf.h"
#include "paddle/fluid/string/printf.h"
DEFINE_bool(benchmark, false,
"Doing memory benchmark. It will make deleting scope synchronized, "

@ -28,17 +28,18 @@ class ConcatKernel : public framework::OpKernel<T> {
auto ins = ctx.MultiInput<framework::Tensor>("X");
auto* out = ctx.Output<framework::Tensor>("Out");
int64_t axis = static_cast<int64_t>(ctx.Attr<int>("axis"));
const size_t n = ins.size();
auto place = ctx.GetPlace();
out->mutable_data<T>(place);
auto out_stride = framework::stride_numel(out->dims());
size_t output_offset = 0;
out->mutable_data<T>(ctx.GetPlace());
auto out_stride = framework::stride(out->dims());
for (size_t i = 0; i < n; i++) {
auto& in = ins[i];
auto axis_dim = in->dims()[axis];
auto in_stride = framework::stride(in->dims());
StridedMemcpy<T>(ctx.device_context(), in->data<T>(), in_stride,
in->dims(), out_stride, out->data<T>() + output_offset);
output_offset += axis_dim * in_stride[axis];
for (auto* in : ins) {
auto in_stride = framework::stride_numel(in->dims());
StridedNumelCopyWithAxis<T>(ctx.device_context(), axis,
out->data<T>() + output_offset, out_stride,
in->data<T>(), in_stride);
output_offset += in_stride[axis];
}
}
};
@ -50,17 +51,16 @@ class ConcatGradKernel : public framework::OpKernel<T> {
auto* in = ctx.Input<framework::Tensor>(framework::GradVarName("Out"));
auto outs = ctx.MultiOutput<framework::Tensor>(framework::GradVarName("X"));
int64_t axis = static_cast<int64_t>(ctx.Attr<int>("axis"));
const size_t n = outs.size();
size_t input_offset = 0;
auto in_stride = framework::stride(in->dims());
for (size_t i = 0; i < n; i++) {
auto& out = outs[i];
auto in_stride = framework::stride_numel(in->dims());
for (auto& out : outs) {
out->mutable_data<T>(ctx.GetPlace());
size_t axis_dim = out->dims()[axis];
auto out_stride = framework::stride(out->dims());
StridedMemcpy<T>(ctx.device_context(), in->data<T>() + input_offset,
in_stride, out->dims(), out_stride, out->data<T>());
input_offset += axis_dim * in_stride[axis];
auto out_stride = framework::stride_numel(out->dims());
StridedNumelCopyWithAxis<T>(ctx.device_context(), axis, out->data<T>(),
out_stride, in->data<T>() + input_offset,
in_stride);
input_offset += out_stride[axis];
}
}
};

@ -27,7 +27,7 @@ limitations under the License. */
#include "paddle/fluid/operators/detail/grpc_server.h"
#include "paddle/fluid/operators/detail/sendrecvop_utils.h"
#include "paddle/fluid/operators/detail/simple_block_queue.h"
#include "paddle/string/printf.h"
#include "paddle/fluid/string/printf.h"
namespace paddle {
namespace operators {
@ -98,6 +98,7 @@ class ListenAndServOp : public framework::OperatorBase {
// the gradients arrives, just add suffix 0~n and merge the gradient.
rpc_service_->SetCond(0);
size_t recv_var_cnt = 0;
size_t update_param_cnt = 0;
int batch_barrier = 0;
while (batch_barrier != fan_in) {
const detail::MessageWithName &v = rpc_service_->Get();
@ -122,11 +123,10 @@ class ListenAndServOp : public framework::OperatorBase {
}
}
VLOG(3) << "recv " << recv_var_cnt << " parmeters for one barrier.";
// TODO(Yancey1989): merge SelectedRows variables here
if (exit_flag) {
rpc_service_->ShutDown();
}
VLOG(3) << "run optimize graph...";
try {
executor.Run(*program, &recv_scope, block->ID(), /*global_block*/
false /*create_local_scope*/, false /*create_vars*/);
@ -134,7 +134,7 @@ class ListenAndServOp : public framework::OperatorBase {
LOG(ERROR) << "run sub program error " << e.what();
}
rpc_service_->SetCond(1);
rpc_service_->WaitClientGet(recv_var_cnt);
rpc_service_->WaitClientGet(update_param_cnt);
grads_counter_.clear();
} // while(true)
}

@ -22,7 +22,7 @@ limitations under the License. */
#include "paddle/fluid/framework/program_desc.h"
#include "paddle/fluid/operators/math/math_function.h"
#include "paddle/fluid/operators/math/selected_rows_functor.h"
#include "paddle/string/printf.h"
#include "paddle/fluid/string/printf.h"
USE_NO_KERNEL_OP(send);
USE_NO_KERNEL_OP(listen_and_serv);

@ -29,7 +29,9 @@ class SequenceExpandOp : public framework::OperatorWithKernel {
PADDLE_ENFORCE(ctx->HasOutput("Out"));
PADDLE_ENFORCE(ctx->HasInput("Y"));
framework::DDim out_dim;
out_dim = ctx->GetInputDim("Y");
auto y_dim = ctx->GetInputDim("Y");
out_dim = ctx->GetInputDim("X");
out_dim[0] = y_dim[0];
ctx->ShareLoD("Y", "Out");
ctx->SetOutputDim("Out", out_dim);
}

@ -14,6 +14,7 @@ limitations under the License. */
#pragma once
#include <chrono>
#include <vector>
#include "paddle/fluid/framework/op_registry.h"
#include "paddle/fluid/operators/strided_memcpy.h"
@ -27,18 +28,18 @@ class SplitOpKernel : public framework::OpKernel<T> {
void Compute(const framework::ExecutionContext& ctx) const override {
auto* in = ctx.Input<framework::Tensor>("X");
auto outs = ctx.MultiOutput<framework::Tensor>("Out");
auto in_stride = framework::stride(in->dims());
auto in_stride = framework::stride_numel(in->dims());
int64_t axis = static_cast<int64_t>(ctx.Attr<int>("axis"));
const size_t n = outs.size();
auto place = ctx.GetPlace();
size_t input_offset = 0;
for (size_t i = 0; i < n; i++) {
auto& out = outs[i];
for (auto& out : outs) {
out->mutable_data<T>(ctx.GetPlace());
size_t axis_dim = out->dims()[axis];
auto out_stride = framework::stride(out->dims());
StridedMemcpy<T>(ctx.device_context(), in->data<T>() + input_offset,
in_stride, out->dims(), out_stride, out->data<T>());
input_offset += axis_dim * in_stride[axis];
auto out_stride = framework::stride_numel(out->dims());
StridedNumelCopyWithAxis<T>(ctx.device_context(), axis, out->data<T>(),
out_stride, in->data<T>() + input_offset,
in_stride);
input_offset += out_stride[axis];
}
}
};

@ -41,5 +41,62 @@ inline void StridedMemcpy(const platform::DeviceContext& dev_ctx, const T* src,
StridedCopyDimVisitor<T> func(dev_ctx, src, src_stride, dst_stride, dst);
boost::apply_visitor(func, dst_dim);
}
// Strided numel memory copy from src to dst by the specified axis
//
// For example, for a tensor dims [4, 20, 100], the strieded numel is
// [8000, 2000, 100]
//
// NOTE: The src and dst tensor should have the same elements
// except the specified axis.
template <typename T>
inline void StridedNumelCopyWithAxis(const platform::DeviceContext& ctx,
int64_t axis, T* dst,
const framework::DDim& dst_stride_numel,
const T* src,
const framework::DDim& src_stride_numel) {
int64_t before = dst_stride_numel[0] / dst_stride_numel[axis];
int64_t src_after = src_stride_numel[axis];
int64_t dst_after = dst_stride_numel[axis];
auto place = ctx.GetPlace();
PADDLE_ENFORCE_EQ(src_stride_numel.size(), dst_stride_numel.size(),
"src and dst tensor should have the same dims size.");
for (int64_t i = 0; i < axis; ++i) {
if (i < axis) {
PADDLE_ENFORCE_EQ(src_stride_numel[i] / src_stride_numel[axis],
dst_stride_numel[i] / dst_stride_numel[axis],
"src and dst should have the same elements "
"except the specified axis.");
} else if (i == axis) {
continue;
} else {
PADDLE_ENFORCE_EQ(src_stride_numel[i], dst_stride_numel[i],
"src and dst should have the same elements "
"except the specified axis.");
}
}
for (int64_t i = 0; i < before; ++i) {
if (platform::is_cpu_place(place)) {
auto& cpu_place = boost::get<platform::CPUPlace>(place);
memory::Copy(cpu_place, dst + i * dst_after, cpu_place,
src + i * src_after, sizeof(T) * src_after);
} else {
#ifdef PADDLE_WITH_CUDA
auto& gpu_place = boost::get<platform::CUDAPlace>(place);
auto& cuda_ctx =
reinterpret_cast<const platform::CUDADeviceContext&>(ctx);
memory::Copy(gpu_place, dst + i * dst_after, gpu_place,
src + i * src_after, sizeof(T) * src_after,
cuda_ctx.stream());
#else
PADDLE_THROW("Paddle is not compiled with GPU");
#endif
}
}
}
} // namespace operators
} // namespace paddle

File diff suppressed because it is too large Load Diff

@ -17,39 +17,41 @@ limitations under the License. */
namespace paddle {
namespace operators {
template <typename T>
template <typename T, typename WT>
__global__ void NegTargetAssignKernel(const int* neg_indices, const size_t* lod,
const int num, const int num_prior_box,
const int background_label,
int* out_label, T* out_label_wt) {
const int N, const int M, const int K,
const int mismatch_value, T* out,
WT* out_wt) {
int bidx = blockIdx.x;
int st = lod[bidx];
int ed = lod[bidx + 1];
int row_start = bidx * num_prior_box;
int row_start = bidx * M;
for (int i = st + threadIdx.x; i < ed; i += blockDim.x) {
int id = row_start + neg_indices[i];
out_label[id] = background_label;
out_label_wt[id] = 1.;
for (int k = 0; k < K; ++k) {
out[id * K + k] = T(mismatch_value);
out_wt[id * K + k] = WT(1.);
}
}
}
template <typename T>
struct NegTargetAssignFunctor<platform::CUDADeviceContext, T> {
template <typename T, typename WT>
struct NegTargetAssignFunctor<platform::CUDADeviceContext, T, WT> {
void operator()(const platform::CUDADeviceContext& ctx,
const int* neg_indices, const size_t* lod, const int num,
const int num_prior_box, const int background_label,
int* out_label, T* out_label_wt) {
const int* neg_indices, const size_t* lod, const int N,
const int M, const int K, const int mismatch_value, T* out,
WT* out_wt) {
const int block_size = 256;
const int grid_size = num;
NegTargetAssignKernel<T><<<grid_size, block_size, 0, ctx.stream()>>>(
neg_indices, lod, num, num_prior_box, background_label, out_label,
out_label_wt);
const int grid_size = N;
NegTargetAssignKernel<T, WT><<<grid_size, block_size, 0, ctx.stream()>>>(
neg_indices, lod, N, M, K, mismatch_value, out, out_wt);
}
};
template struct NegTargetAssignFunctor<platform::CUDADeviceContext, float>;
template struct NegTargetAssignFunctor<platform::CUDADeviceContext, double>;
template struct NegTargetAssignFunctor<platform::CUDADeviceContext, int, float>;
template struct NegTargetAssignFunctor<platform::CUDADeviceContext, float,
float>;
} // namespace operators
} // namespace paddle
@ -57,5 +59,5 @@ template struct NegTargetAssignFunctor<platform::CUDADeviceContext, double>;
namespace ops = paddle::operators;
REGISTER_OP_CUDA_KERNEL(
target_assign,
ops::TargetAssignKernel<paddle::platform::CUDADeviceContext, float>,
ops::TargetAssignKernel<paddle::platform::CUDADeviceContext, double>);
ops::TargetAssignKernel<paddle::platform::CUDADeviceContext, int, float>,
ops::TargetAssignKernel<paddle::platform::CUDADeviceContext, float, float>);

@ -19,140 +19,113 @@ limitations under the License. */
namespace paddle {
namespace operators {
template <typename T>
template <typename T, typename WT>
struct TargetAssignFunctor {
const T* gt_box_;
const int* gt_label_;
const T* in_;
const int* match_indices_;
const size_t* lod_;
const int background_label_;
const int64_t num_;
const int64_t num_prior_box_;
T* out_box_;
T* out_box_wt_;
int* out_label_;
T* out_label_wt_;
TargetAssignFunctor(const T* gt_box, const int* gt_label,
const int* match_indices, const size_t* lod,
const int background_label, const int64_t num,
const int64_t np, T* out_box, T* out_box_wt,
int* out_label, T* out_label_wt)
: gt_box_(gt_box),
gt_label_(gt_label),
const int mismatch_value_;
const int64_t N_;
const int64_t M_;
const int64_t P_;
const int64_t K_;
T* out_;
WT* out_wt_;
TargetAssignFunctor(const T* input, const int* match_indices,
const size_t* lod, const int mismatch_value,
const int64_t N, const int64_t M, const int64_t P,
const int64_t K, T* out, WT* out_wt)
: in_(input),
match_indices_(match_indices),
lod_(lod),
background_label_(background_label),
num_(num),
num_prior_box_(np),
out_box_(out_box),
out_box_wt_(out_box_wt),
out_label_(out_label),
out_label_wt_(out_label_wt) {}
mismatch_value_(mismatch_value),
N_(N),
M_(M),
P_(P),
K_(K),
out_(out),
out_wt_(out_wt) {}
HOSTDEVICE void operator()(size_t i) const {
int row = i / num_prior_box_;
int col = i - row * num_prior_box_;
int h = i / M_;
int w = i - h * M_;
size_t row_off = lod_[row];
int offset = row * num_prior_box_ + col;
size_t off = lod_[h];
int id = match_indices_[i];
int id = match_indices_[offset];
T* obox = out_box_ + offset * 4;
int* olabel = out_label_ + offset;
T* obox_wt = out_box_wt_ + offset;
T* olabel_wt = out_label_wt_ + offset;
T* out = out_ + i * K_;
WT* out_wt = out_wt_ + i;
if (id > -1) {
const T* gtbox = gt_box_ + ((row_off + id) * num_prior_box_ + col) * 4;
obox[0] = gtbox[0];
obox[1] = gtbox[1];
obox[2] = gtbox[2];
obox[3] = gtbox[3];
olabel[0] = gt_label_[row_off + id];
obox_wt[0] = static_cast<T>(1.);
olabel_wt[0] = static_cast<T>(1.);
int w_off = w % P_;
const T* in = in_ + ((off + id) * P_ + w_off) * K_;
for (int64_t k = 0; k < K_; ++k) {
out[k] = in[k];
}
out_wt[0] = static_cast<WT>(1.);
} else {
obox[0] = static_cast<T>(0.);
obox[1] = static_cast<T>(0.);
obox[2] = static_cast<T>(0.);
obox[3] = static_cast<T>(0.);
olabel[0] = background_label_;
obox_wt[0] = static_cast<T>(0.);
olabel_wt[0] = static_cast<T>(0.);
for (int64_t k = 0; k < K_; ++k) {
out[k] = static_cast<T>(mismatch_value_);
}
out_wt[0] = static_cast<WT>(0.);
}
}
};
template <typename DeviceContext, typename T>
template <typename DeviceContext, typename T, typename WT>
struct NegTargetAssignFunctor {
void operator()(const platform::DeviceContext& ctx, const int* neg_indices,
const size_t* lod, const int num, const int num_prior_box,
const int background_label, int* out_label,
T* out_label_wt) const;
const size_t* lod, const int N, const int M, const int K,
const int mismatch_value, T* out, WT* out_wt) const;
};
template <typename DeviceContext, typename T>
template <typename DeviceContext, typename T, typename WT>
class TargetAssignKernel : public framework::OpKernel<T> {
public:
void Compute(const framework::ExecutionContext& ctx) const override {
auto* enc_gt_box = ctx.Input<framework::LoDTensor>("EncodedGTBBox");
auto* gt_label = ctx.Input<framework::LoDTensor>("GTScoreLabel");
auto* x = ctx.Input<framework::LoDTensor>("X");
auto* match_indices = ctx.Input<framework::Tensor>("MatchIndices");
auto* neg_indices = ctx.Input<framework::LoDTensor>("NegIndices");
auto* out_box = ctx.Output<framework::Tensor>("PredBBoxLabel");
auto* out_box_wt = ctx.Output<framework::Tensor>("PredBBoxWeight");
auto* out_label = ctx.Output<framework::Tensor>("PredScoreLabel");
auto* out_label_wt = ctx.Output<framework::Tensor>("PredScoreWeight");
PADDLE_ENFORCE_EQ(enc_gt_box->lod().size(), 1UL);
PADDLE_ENFORCE_EQ(gt_label->lod().size(), 1UL);
PADDLE_ENFORCE_EQ(neg_indices->lod().size(), 1UL);
auto* out = ctx.Output<framework::Tensor>("Out");
auto* out_wt = ctx.Output<framework::Tensor>("OutWeight");
int background_label = ctx.Attr<int>("background_label");
PADDLE_ENFORCE_EQ(x->lod().size(), 1UL);
int mismatch_value = ctx.Attr<int>("mismatch_value");
const T* box_data = enc_gt_box->data<T>();
const int* label_data = gt_label->data<int>();
const T* x_data = x->data<T>();
const int* match_idx_data = match_indices->data<int>();
const int* neg_idx_data = neg_indices->data<int>();
T* obox_data = out_box->mutable_data<T>(ctx.GetPlace());
T* obox_wt_data = out_box_wt->mutable_data<T>(ctx.GetPlace());
int* olabel_data = out_label->mutable_data<int>(ctx.GetPlace());
T* olabel_wt_data = out_label_wt->mutable_data<T>(ctx.GetPlace());
T* out_data = out->mutable_data<T>(ctx.GetPlace());
WT* out_wt_data = out_wt->mutable_data<WT>(ctx.GetPlace());
int64_t num = match_indices->dims()[0];
int64_t num_prior_box = match_indices->dims()[1];
int64_t n = match_indices->dims()[0];
int64_t m = match_indices->dims()[1];
int64_t p = x->dims()[1];
int64_t k = x->dims()[2];
auto gt_lod = enc_gt_box->lod().back();
auto gt_label_lod = gt_label->lod().back();
auto neg_lod = neg_indices->lod().back();
for (size_t i = 0; i < gt_lod.size(); ++i) {
PADDLE_ENFORCE_EQ(gt_lod.data()[i], gt_label_lod.data()[i]);
}
size_t* gt_lod_data = gt_lod.MutableData(ctx.GetPlace());
size_t* neg_lod_data = neg_lod.MutableData(ctx.GetPlace());
auto x_lod = x->lod().back();
size_t* x_lod_data = x_lod.MutableData(ctx.GetPlace());
TargetAssignFunctor<T> functor(box_data, label_data, match_idx_data,
gt_lod_data, background_label, num,
num_prior_box, obox_data, obox_wt_data,
olabel_data, olabel_wt_data);
TargetAssignFunctor<T, WT> functor(x_data, match_idx_data, x_lod_data,
mismatch_value, n, m, p, k, out_data,
out_wt_data);
auto& device_ctx = ctx.template device_context<DeviceContext>();
platform::ForRange<DeviceContext> for_range(device_ctx,
num * num_prior_box);
platform::ForRange<DeviceContext> for_range(device_ctx, n * m);
for_range(functor);
NegTargetAssignFunctor<DeviceContext, T> neg_trg_functor;
neg_trg_functor(device_ctx, neg_idx_data, neg_lod_data, num, num_prior_box,
background_label, olabel_data, olabel_wt_data);
auto* neg_indices = ctx.Input<framework::LoDTensor>("NegIndices");
if (neg_indices) {
PADDLE_ENFORCE_EQ(neg_indices->lod().size(), 1UL);
const int* neg_idx_data = neg_indices->data<int>();
auto neg_lod = neg_indices->lod().back();
size_t* neg_lod_data = neg_lod.MutableData(ctx.GetPlace());
NegTargetAssignFunctor<DeviceContext, T, WT> neg_trg_functor;
neg_trg_functor(device_ctx, neg_idx_data, neg_lod_data, n, m, k,
mismatch_value, out_data, out_wt_data);
}
}
};

@ -12,7 +12,7 @@
// See the License for the specific language governing permissions and
// limitations under the License.
#include "paddle/fluid/platform/cpu_info.h"
#include "paddle/string/printf.h"
#include "paddle/fluid/string/printf.h"
#include <ostream>
#include <sstream>

@ -23,8 +23,8 @@ limitations under the License. */
#include <string>
#include "paddle/fluid/platform/macros.h"
#include "paddle/string/printf.h"
#include "paddle/string/to_string.h"
#include "paddle/fluid/string/printf.h"
#include "paddle/fluid/string/to_string.h"
#ifdef __GNUC__
#include <cxxabi.h> // for __cxa_demangle

@ -15,7 +15,7 @@ limitations under the License. */
#include "gtest/gtest.h"
#include "paddle/fluid/platform/enforce.h"
#include "paddle/string/piece.h"
#include "paddle/fluid/string/piece.h"
using StringPiece = paddle::string::Piece;
using paddle::string::HasPrefix;

@ -35,7 +35,7 @@ limitations under the License. */
#include "paddle/fluid/pybind/exception.h"
#include "paddle/fluid/pybind/pybind.h"
#include "paddle/fluid/pybind/tensor_py.h"
#include "paddle/string/to_string.h"
#include "paddle/fluid/string/to_string.h"
#ifdef PADDLE_WITH_CUDA
#include "paddle/fluid/operators/nccl/nccl_gpu_common.h"

Some files were not shown because too many files have changed in this diff Show More

Loading…
Cancel
Save