fix conflict, test=develop (#24238)
parent
c3c61d34c1
commit
950892044f
@ -0,0 +1,163 @@
|
|||||||
|
/* Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
|
||||||
|
|
||||||
|
Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
you may not use this file except in compliance with the License.
|
||||||
|
You may obtain a copy of the License at
|
||||||
|
|
||||||
|
http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
|
||||||
|
Unless required by applicable law or agreed to in writing, software
|
||||||
|
distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
See the License for the specific language governing permissions and
|
||||||
|
limitations under the License. */
|
||||||
|
|
||||||
|
#pragma once
|
||||||
|
#ifdef PADDLE_WITH_BOX_PS
|
||||||
|
#include <vector>
|
||||||
|
namespace paddle {
|
||||||
|
namespace framework {
|
||||||
|
|
||||||
|
template <size_t EMBEDX_DIM, size_t EXPAND_EMBED_DIM>
|
||||||
|
void BoxWrapper::PullSparseCase(const paddle::platform::Place& place,
|
||||||
|
const std::vector<const uint64_t*>& keys,
|
||||||
|
const std::vector<float*>& values,
|
||||||
|
const std::vector<int64_t>& slot_lengths,
|
||||||
|
const int hidden_size,
|
||||||
|
const int expand_embed_dim) {
|
||||||
|
VLOG(3) << "Begin PullSparse";
|
||||||
|
platform::Timer all_timer;
|
||||||
|
platform::Timer pull_boxps_timer;
|
||||||
|
all_timer.Start();
|
||||||
|
|
||||||
|
int64_t total_length =
|
||||||
|
std::accumulate(slot_lengths.begin(), slot_lengths.end(), 0UL);
|
||||||
|
auto buf = memory::AllocShared(
|
||||||
|
place, total_length *
|
||||||
|
sizeof(boxps::FeatureValueGpu<EMBEDX_DIM, EXPAND_EMBED_DIM>));
|
||||||
|
boxps::FeatureValueGpu<EMBEDX_DIM, EXPAND_EMBED_DIM>* total_values_gpu =
|
||||||
|
reinterpret_cast<boxps::FeatureValueGpu<EMBEDX_DIM, EXPAND_EMBED_DIM>*>(
|
||||||
|
buf->ptr());
|
||||||
|
|
||||||
|
if (platform::is_cpu_place(place)) {
|
||||||
|
PADDLE_THROW(platform::errors::Unimplemented(
|
||||||
|
"Warning:: CPUPlace is not supported in PaddleBox now."));
|
||||||
|
} else if (platform::is_gpu_place(place)) {
|
||||||
|
#if defined(PADDLE_WITH_CUDA) && !defined(_WIN32)
|
||||||
|
VLOG(3) << "Begin copy keys, key_num[" << total_length << "]";
|
||||||
|
int device_id = BOOST_GET_CONST(platform::CUDAPlace, place).GetDeviceId();
|
||||||
|
LoDTensor& total_keys_tensor = keys_tensor[device_id];
|
||||||
|
uint64_t* total_keys = reinterpret_cast<uint64_t*>(
|
||||||
|
total_keys_tensor.mutable_data<int64_t>({total_length, 1}, place));
|
||||||
|
|
||||||
|
// construct slot_level lod info
|
||||||
|
auto slot_lengths_lod = slot_lengths;
|
||||||
|
for (size_t i = 1; i < slot_lengths_lod.size(); i++) {
|
||||||
|
slot_lengths_lod[i] += slot_lengths_lod[i - 1];
|
||||||
|
}
|
||||||
|
auto buf_key = memory::AllocShared(place, keys.size() * sizeof(uint64_t*));
|
||||||
|
auto buf_length =
|
||||||
|
memory::AllocShared(place, slot_lengths.size() * sizeof(int64_t));
|
||||||
|
uint64_t** gpu_keys = reinterpret_cast<uint64_t**>(buf_key->ptr());
|
||||||
|
int64_t* gpu_len = reinterpret_cast<int64_t*>(buf_length->ptr());
|
||||||
|
cudaMemcpy(gpu_keys, keys.data(), keys.size() * sizeof(uint64_t*),
|
||||||
|
cudaMemcpyHostToDevice);
|
||||||
|
cudaMemcpy(gpu_len, slot_lengths_lod.data(),
|
||||||
|
slot_lengths.size() * sizeof(int64_t), cudaMemcpyHostToDevice);
|
||||||
|
|
||||||
|
this->CopyKeys(place, gpu_keys, total_keys, gpu_len,
|
||||||
|
static_cast<int>(slot_lengths.size()),
|
||||||
|
static_cast<int>(total_length));
|
||||||
|
VLOG(3) << "Begin call PullSparseGPU in BoxPS";
|
||||||
|
pull_boxps_timer.Start();
|
||||||
|
int ret = boxps_ptr_->PullSparseGPU(
|
||||||
|
total_keys, reinterpret_cast<void*>(total_values_gpu),
|
||||||
|
static_cast<int>(total_length), device_id);
|
||||||
|
PADDLE_ENFORCE_EQ(ret, 0, platform::errors::PreconditionNotMet(
|
||||||
|
"PullSparseGPU failed in BoxPS."));
|
||||||
|
pull_boxps_timer.Pause();
|
||||||
|
|
||||||
|
VLOG(3) << "Begin Copy result to tensor, total_length[" << total_length
|
||||||
|
<< "]";
|
||||||
|
this->CopyForPull(place, gpu_keys, values,
|
||||||
|
reinterpret_cast<void*>(total_values_gpu), gpu_len,
|
||||||
|
static_cast<int>(slot_lengths.size()), hidden_size,
|
||||||
|
expand_embed_dim, total_length);
|
||||||
|
#else
|
||||||
|
PADDLE_THROW(platform::errors::PreconditionNotMet(
|
||||||
|
"Please compile WITH_GPU option, because NCCL doesn't support "
|
||||||
|
"windows."));
|
||||||
|
#endif
|
||||||
|
} else {
|
||||||
|
PADDLE_THROW(platform::errors::PreconditionNotMet(
|
||||||
|
"PaddleBox: PullSparse Only Support CPUPlace or CUDAPlace Now."));
|
||||||
|
}
|
||||||
|
all_timer.Pause();
|
||||||
|
VLOG(1) << "PullSparse total costs: " << all_timer.ElapsedSec()
|
||||||
|
<< " s, of which BoxPS costs: " << pull_boxps_timer.ElapsedSec()
|
||||||
|
<< " s";
|
||||||
|
VLOG(3) << "End PullSparse";
|
||||||
|
}
|
||||||
|
|
||||||
|
template <size_t EMBEDX_DIM, size_t EXPAND_EMBED_DIM>
|
||||||
|
void BoxWrapper::PushSparseGradCase(
|
||||||
|
const paddle::platform::Place& place,
|
||||||
|
const std::vector<const uint64_t*>& keys,
|
||||||
|
const std::vector<const float*>& grad_values,
|
||||||
|
const std::vector<int64_t>& slot_lengths, const int hidden_size,
|
||||||
|
const int expand_embed_dim, const int batch_size) {
|
||||||
|
VLOG(3) << "Begin PushSparseGrad";
|
||||||
|
platform::Timer all_timer;
|
||||||
|
platform::Timer push_boxps_timer;
|
||||||
|
all_timer.Start();
|
||||||
|
int64_t total_length =
|
||||||
|
std::accumulate(slot_lengths.begin(), slot_lengths.end(), 0UL);
|
||||||
|
auto buf = memory::AllocShared(
|
||||||
|
place,
|
||||||
|
total_length *
|
||||||
|
sizeof(boxps::FeaturePushValueGpu<EMBEDX_DIM, EXPAND_EMBED_DIM>));
|
||||||
|
boxps::FeaturePushValueGpu<EMBEDX_DIM, EXPAND_EMBED_DIM>*
|
||||||
|
total_grad_values_gpu = reinterpret_cast<
|
||||||
|
boxps::FeaturePushValueGpu<EMBEDX_DIM, EXPAND_EMBED_DIM>*>(
|
||||||
|
buf->ptr());
|
||||||
|
if (platform::is_cpu_place(place)) {
|
||||||
|
PADDLE_THROW(platform::errors::Unimplemented(
|
||||||
|
"Warning:: CPUPlace is not supported in PaddleBox now."));
|
||||||
|
} else if (platform::is_gpu_place(place)) {
|
||||||
|
#if defined(PADDLE_WITH_CUDA) && !defined(_WIN32)
|
||||||
|
int device_id = BOOST_GET_CONST(platform::CUDAPlace, place).GetDeviceId();
|
||||||
|
LoDTensor& cached_total_keys_tensor = keys_tensor[device_id];
|
||||||
|
uint64_t* total_keys =
|
||||||
|
reinterpret_cast<uint64_t*>(cached_total_keys_tensor.data<int64_t>());
|
||||||
|
VLOG(3) << "Begin copy grad tensor to boxps struct";
|
||||||
|
this->CopyForPush(place, grad_values, total_grad_values_gpu, slot_lengths,
|
||||||
|
hidden_size, expand_embed_dim, total_length, batch_size);
|
||||||
|
|
||||||
|
VLOG(3) << "Begin call PushSparseGPU in BoxPS";
|
||||||
|
push_boxps_timer.Start();
|
||||||
|
int ret = boxps_ptr_->PushSparseGPU(
|
||||||
|
total_keys, reinterpret_cast<void*>(total_grad_values_gpu),
|
||||||
|
static_cast<int>(total_length),
|
||||||
|
BOOST_GET_CONST(platform::CUDAPlace, place).GetDeviceId());
|
||||||
|
PADDLE_ENFORCE_EQ(ret, 0, platform::errors::PreconditionNotMet(
|
||||||
|
"PushSparseGPU failed in BoxPS."));
|
||||||
|
push_boxps_timer.Pause();
|
||||||
|
#else
|
||||||
|
PADDLE_THROW(platform::errors::PreconditionNotMet(
|
||||||
|
"Please compile WITH_GPU option, because NCCL doesn't support "
|
||||||
|
"windows."));
|
||||||
|
#endif
|
||||||
|
} else {
|
||||||
|
PADDLE_THROW(platform::errors::PreconditionNotMet(
|
||||||
|
"PaddleBox: PushSparseGrad Only Support CPUPlace or CUDAPlace Now."));
|
||||||
|
}
|
||||||
|
all_timer.Pause();
|
||||||
|
VLOG(1) << "PushSparseGrad total cost: " << all_timer.ElapsedSec()
|
||||||
|
<< " s, of which BoxPS cost: " << push_boxps_timer.ElapsedSec()
|
||||||
|
<< " s";
|
||||||
|
VLOG(3) << "End PushSparseGrad";
|
||||||
|
}
|
||||||
|
|
||||||
|
} // namespace framework
|
||||||
|
} // namespace paddle
|
||||||
|
#endif
|
@ -0,0 +1,157 @@
|
|||||||
|
// Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
|
||||||
|
//
|
||||||
|
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
// you may not use this file except in compliance with the License.
|
||||||
|
// You may obtain a copy of the License at
|
||||||
|
//
|
||||||
|
// http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
//
|
||||||
|
// Unless required by applicable law or agreed to in writing, software
|
||||||
|
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
// See the License for the specific language governing permissions and
|
||||||
|
// limitations under the License.
|
||||||
|
|
||||||
|
#include "paddle/fluid/operators/pull_box_extended_sparse_op.h"
|
||||||
|
|
||||||
|
namespace paddle {
|
||||||
|
namespace operators {
|
||||||
|
|
||||||
|
class PullBoxExtendedSparseOp : public framework::OperatorWithKernel {
|
||||||
|
public:
|
||||||
|
using framework::OperatorWithKernel::OperatorWithKernel;
|
||||||
|
void InferShape(framework::InferShapeContext* ctx) const override {
|
||||||
|
PADDLE_ENFORCE_GE(
|
||||||
|
ctx->Inputs("Ids").size(), 1UL,
|
||||||
|
platform::errors::InvalidArgument(
|
||||||
|
"Inputs(Ids) of PullBoxExtendedSparseOp should not be empty."));
|
||||||
|
PADDLE_ENFORCE_GE(
|
||||||
|
ctx->Outputs("Out").size(), 1UL,
|
||||||
|
platform::errors::InvalidArgument(
|
||||||
|
"Outputs(Out) of PullBoxExtendedSparseOp should not be empty."));
|
||||||
|
PADDLE_ENFORCE_GE(ctx->Outputs("OutExtend").size(), 1UL,
|
||||||
|
platform::errors::InvalidArgument(
|
||||||
|
"Outputs(OutExtend) of PullBoxExtendedSparseOp "
|
||||||
|
"should not be empty."));
|
||||||
|
auto emb_size = static_cast<int64_t>(ctx->Attrs().Get<int>("emb_size"));
|
||||||
|
auto emb_extended_size =
|
||||||
|
static_cast<int64_t>(ctx->Attrs().Get<int>("emb_extended_size"));
|
||||||
|
auto all_ids_dim = ctx->GetInputsDim("Ids");
|
||||||
|
const size_t n_ids = all_ids_dim.size();
|
||||||
|
std::vector<framework::DDim> outs_dims;
|
||||||
|
std::vector<framework::DDim> outs_extended_dims;
|
||||||
|
outs_dims.resize(n_ids);
|
||||||
|
outs_extended_dims.resize(n_ids);
|
||||||
|
for (size_t i = 0; i < n_ids; ++i) {
|
||||||
|
const auto ids_dims = all_ids_dim[i];
|
||||||
|
int ids_rank = ids_dims.size();
|
||||||
|
PADDLE_ENFORCE_EQ(ids_dims[ids_rank - 1], 1,
|
||||||
|
platform::errors::InvalidArgument(
|
||||||
|
"Shape error in %lu id, the last dimension of the "
|
||||||
|
"'Ids' tensor must be 1.",
|
||||||
|
i));
|
||||||
|
auto out_dim = framework::vectorize(
|
||||||
|
framework::slice_ddim(ids_dims, 0, ids_rank - 1));
|
||||||
|
out_dim.push_back(emb_size);
|
||||||
|
outs_dims[i] = framework::make_ddim(out_dim);
|
||||||
|
|
||||||
|
auto out_extended_dim = framework::vectorize(
|
||||||
|
framework::slice_ddim(ids_dims, 0, ids_rank - 1));
|
||||||
|
out_extended_dim.push_back(emb_extended_size);
|
||||||
|
outs_extended_dims[i] = framework::make_ddim(out_extended_dim);
|
||||||
|
}
|
||||||
|
ctx->SetOutputsDim("Out", outs_dims);
|
||||||
|
ctx->SetOutputsDim("OutExtend", outs_extended_dims);
|
||||||
|
for (size_t i = 0; i < n_ids; ++i) {
|
||||||
|
ctx->ShareLoD("Ids", "Out", i, i);
|
||||||
|
ctx->ShareLoD("Ids", "OutExtend", i, i);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
protected:
|
||||||
|
framework::OpKernelType GetExpectedKernelType(
|
||||||
|
const framework::ExecutionContext& ctx) const override {
|
||||||
|
return framework::OpKernelType(framework::proto::VarType::FP32,
|
||||||
|
ctx.device_context());
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
class PullBoxExtendedSparseOpMaker : public framework::OpProtoAndCheckerMaker {
|
||||||
|
public:
|
||||||
|
void Make() override {
|
||||||
|
AddInput("Ids",
|
||||||
|
"Input tensors with type int32 or int64 "
|
||||||
|
"contains the ids to be looked up in BoxPS. "
|
||||||
|
"The last dimension size must be 1.")
|
||||||
|
.AsDuplicable();
|
||||||
|
AddOutput("Out", "The lookup results tensors.").AsDuplicable();
|
||||||
|
AddOutput("OutExtend", "The lookup extended results tensors.")
|
||||||
|
.AsDuplicable();
|
||||||
|
AddAttr<int>("emb_size", "(int, the embedding hidden size").SetDefault(1);
|
||||||
|
AddAttr<int>("emb_extended_size",
|
||||||
|
"(int, the extended_embedding hidden size")
|
||||||
|
.SetDefault(128);
|
||||||
|
AddComment(R"DOC(
|
||||||
|
Pull Box Extended Sparse Operator.
|
||||||
|
|
||||||
|
This operator is used to perform lookups on the BoxPS,
|
||||||
|
then concatenated into a dense tensor.
|
||||||
|
|
||||||
|
The input Ids can carry the LoD (Level of Details) information,
|
||||||
|
or not. And the output only shares the LoD information with input Ids.
|
||||||
|
|
||||||
|
)DOC");
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
template <typename T>
|
||||||
|
class PushBoxExtendedSparseOpMaker : public framework::SingleGradOpMaker<T> {
|
||||||
|
public:
|
||||||
|
using framework::SingleGradOpMaker<T>::SingleGradOpMaker;
|
||||||
|
|
||||||
|
protected:
|
||||||
|
void Apply(GradOpPtr<T> op) const override {
|
||||||
|
op->SetType("push_box_extended_sparse");
|
||||||
|
op->SetInput("Ids", this->Input("Ids"));
|
||||||
|
op->SetInput(framework::GradVarName("Out"), this->OutputGrad("Out"));
|
||||||
|
op->SetInput(framework::GradVarName("OutExtend"),
|
||||||
|
this->OutputGrad("OutExtend"));
|
||||||
|
op->SetOutput(framework::GradVarName("Out"), this->OutputGrad("Out"));
|
||||||
|
op->SetAttrMap(this->Attrs());
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
class PushBoxExtendedSparseOp : public framework::OperatorWithKernel {
|
||||||
|
public:
|
||||||
|
using framework::OperatorWithKernel::OperatorWithKernel;
|
||||||
|
|
||||||
|
void InferShape(framework::InferShapeContext* ctx) const override {}
|
||||||
|
|
||||||
|
protected:
|
||||||
|
framework::OpKernelType GetExpectedKernelType(
|
||||||
|
const framework::ExecutionContext& ctx) const override {
|
||||||
|
return framework::OpKernelType(OperatorWithKernel::IndicateVarDataType(
|
||||||
|
ctx, framework::GradVarName("Out")),
|
||||||
|
ctx.device_context());
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
} // namespace operators
|
||||||
|
} // namespace paddle
|
||||||
|
|
||||||
|
namespace ops = paddle::operators;
|
||||||
|
REGISTER_OPERATOR(
|
||||||
|
pull_box_extended_sparse, ops::PullBoxExtendedSparseOp,
|
||||||
|
ops::PullBoxExtendedSparseOpMaker,
|
||||||
|
ops::PushBoxExtendedSparseOpMaker<paddle::framework::OpDesc>,
|
||||||
|
ops::PushBoxExtendedSparseOpMaker<paddle::imperative::OpBase>);
|
||||||
|
|
||||||
|
REGISTER_OPERATOR(push_box_extended_sparse, ops::PushBoxExtendedSparseOp);
|
||||||
|
|
||||||
|
REGISTER_OP_CPU_KERNEL(pull_box_extended_sparse,
|
||||||
|
ops::PullBoxExtendedSparseCPUKernel<float>,
|
||||||
|
ops::PullBoxExtendedSparseCPUKernel<double>);
|
||||||
|
|
||||||
|
REGISTER_OP_CPU_KERNEL(push_box_extended_sparse,
|
||||||
|
ops::PushBoxExtendedSparseCPUKernel<float>,
|
||||||
|
ops::PushBoxExtendedSparseCPUKernel<double>);
|
@ -0,0 +1,46 @@
|
|||||||
|
// Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
|
||||||
|
//
|
||||||
|
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
// you may not use this file except in compliance with the License.
|
||||||
|
// You may obtain a copy of the License at
|
||||||
|
//
|
||||||
|
// http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
//
|
||||||
|
// Unless required by applicable law or agreed to in writing, software
|
||||||
|
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
// See the License for the specific language governing permissions and
|
||||||
|
// limitations under the License.
|
||||||
|
|
||||||
|
#include "paddle/fluid/operators/pull_box_extended_sparse_op.h"
|
||||||
|
#include "paddle/fluid/platform/cuda_primitives.h"
|
||||||
|
#include "paddle/fluid/platform/gpu_info.h"
|
||||||
|
|
||||||
|
namespace paddle {
|
||||||
|
namespace operators {
|
||||||
|
|
||||||
|
template <typename T>
|
||||||
|
class PullBoxExtendedSparseCUDAKernel : public framework::OpKernel<T> {
|
||||||
|
public:
|
||||||
|
void Compute(const framework::ExecutionContext &ctx) const override {
|
||||||
|
PullBoxExtendedSparseFunctor<T>(ctx);
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
template <typename T>
|
||||||
|
class PushBoxExtendedSparseCUDAKernel : public framework::OpKernel<T> {
|
||||||
|
public:
|
||||||
|
void Compute(const framework::ExecutionContext &ctx) const override {
|
||||||
|
PushBoxExtendedSparseFunctor<T>(ctx);
|
||||||
|
}
|
||||||
|
};
|
||||||
|
} // namespace operators
|
||||||
|
} // namespace paddle
|
||||||
|
|
||||||
|
namespace ops = paddle::operators;
|
||||||
|
REGISTER_OP_CUDA_KERNEL(pull_box_extended_sparse,
|
||||||
|
ops::PullBoxExtendedSparseCUDAKernel<float>,
|
||||||
|
ops::PullBoxExtendedSparseCUDAKernel<double>);
|
||||||
|
REGISTER_OP_CUDA_KERNEL(push_box_extended_sparse,
|
||||||
|
ops::PushBoxExtendedSparseCUDAKernel<float>,
|
||||||
|
ops::PushBoxExtendedSparseCUDAKernel<double>);
|
@ -0,0 +1,119 @@
|
|||||||
|
// Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
|
||||||
|
//
|
||||||
|
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
// you may not use this file except in compliance with the License.
|
||||||
|
// You may obtain a copy of the License at
|
||||||
|
//
|
||||||
|
// http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
//
|
||||||
|
// Unless required by applicable law or agreed to in writing, software
|
||||||
|
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
// See the License for the specific language governing permissions and
|
||||||
|
// limitations under the License.
|
||||||
|
|
||||||
|
#pragma once
|
||||||
|
#include <memory>
|
||||||
|
#include <vector>
|
||||||
|
#include "paddle/fluid/framework/fleet/box_wrapper.h"
|
||||||
|
#include "paddle/fluid/framework/op_registry.h"
|
||||||
|
#include "paddle/fluid/framework/tensor.h"
|
||||||
|
|
||||||
|
namespace paddle {
|
||||||
|
namespace operators {
|
||||||
|
|
||||||
|
template <typename T>
|
||||||
|
static void PullBoxExtendedSparseFunctor(
|
||||||
|
const framework::ExecutionContext &ctx) {
|
||||||
|
auto inputs = ctx.MultiInput<framework::Tensor>("Ids");
|
||||||
|
auto outputs = ctx.MultiOutput<framework::Tensor>("Out");
|
||||||
|
auto outputs_extend = ctx.MultiOutput<framework::Tensor>("OutExtend");
|
||||||
|
const auto slot_size = inputs.size();
|
||||||
|
std::vector<const uint64_t *> all_keys(slot_size);
|
||||||
|
// BoxPS only supports float now
|
||||||
|
std::vector<float *> all_values(slot_size * 2);
|
||||||
|
std::vector<int64_t> slot_lengths(slot_size);
|
||||||
|
for (size_t i = 0; i < slot_size; i++) {
|
||||||
|
const auto *slot = inputs[i];
|
||||||
|
const uint64_t *single_slot_keys =
|
||||||
|
reinterpret_cast<const uint64_t *>(slot->data<int64_t>());
|
||||||
|
all_keys[i] = single_slot_keys;
|
||||||
|
slot_lengths[i] = slot->numel();
|
||||||
|
auto *output = outputs[i]->mutable_data<T>(ctx.GetPlace());
|
||||||
|
auto *output_extend = outputs_extend[i]->mutable_data<T>(ctx.GetPlace());
|
||||||
|
all_values[i] = reinterpret_cast<float *>(output);
|
||||||
|
all_values[i + slot_size] = reinterpret_cast<float *>(output_extend);
|
||||||
|
}
|
||||||
|
#ifdef PADDLE_WITH_BOX_PS
|
||||||
|
auto emb_size = ctx.Attr<int>("emb_size");
|
||||||
|
auto emb_extended_size = ctx.Attr<int>("emb_extended_size");
|
||||||
|
auto box_ptr = paddle::framework::BoxWrapper::GetInstance();
|
||||||
|
box_ptr->PullSparse(ctx.GetPlace(), all_keys, all_values, slot_lengths,
|
||||||
|
emb_size, emb_extended_size);
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
|
||||||
|
template <typename T>
|
||||||
|
static void PushBoxExtendedSparseFunctor(
|
||||||
|
const framework::ExecutionContext &ctx) {
|
||||||
|
auto inputs = ctx.MultiInput<framework::LoDTensor>("Ids");
|
||||||
|
auto d_output =
|
||||||
|
ctx.MultiInput<framework::Tensor>(framework::GradVarName("Out"));
|
||||||
|
auto d_output_extend =
|
||||||
|
ctx.MultiInput<framework::Tensor>(framework::GradVarName("OutExtend"));
|
||||||
|
const auto slot_size = inputs.size();
|
||||||
|
std::vector<const uint64_t *> all_keys(slot_size);
|
||||||
|
std::vector<const float *> all_grad_values(slot_size * 2);
|
||||||
|
std::vector<int64_t> slot_lengths(slot_size);
|
||||||
|
int batch_size = -1;
|
||||||
|
for (size_t i = 0; i < slot_size; i++) {
|
||||||
|
const auto *slot = inputs[i];
|
||||||
|
const uint64_t *single_slot_keys =
|
||||||
|
reinterpret_cast<const uint64_t *>(slot->data<int64_t>());
|
||||||
|
all_keys[i] = single_slot_keys;
|
||||||
|
slot_lengths[i] = slot->numel();
|
||||||
|
int cur_batch_size =
|
||||||
|
slot->lod().size() ? slot->lod()[0].size() - 1 : slot->dims()[0];
|
||||||
|
if (batch_size == -1) {
|
||||||
|
batch_size = cur_batch_size;
|
||||||
|
} else {
|
||||||
|
PADDLE_ENFORCE_EQ(batch_size, cur_batch_size,
|
||||||
|
platform::errors::PreconditionNotMet(
|
||||||
|
"The batch size of all input slots should be same,"
|
||||||
|
"please cheack"));
|
||||||
|
}
|
||||||
|
const float *grad_value = d_output[i]->data<float>();
|
||||||
|
const float *grad_value_extend = d_output_extend[i]->data<float>();
|
||||||
|
all_grad_values[i] = reinterpret_cast<const float *>(grad_value);
|
||||||
|
all_grad_values[i + slot_size] =
|
||||||
|
reinterpret_cast<const float *>(grad_value_extend);
|
||||||
|
}
|
||||||
|
#ifdef PADDLE_WITH_BOX_PS
|
||||||
|
auto emb_size = ctx.Attr<int>("emb_size");
|
||||||
|
auto emb_extended_size = ctx.Attr<int>("emb_extended_size");
|
||||||
|
auto box_ptr = paddle::framework::BoxWrapper::GetInstance();
|
||||||
|
box_ptr->PushSparseGrad(ctx.GetPlace(), all_keys, all_grad_values,
|
||||||
|
slot_lengths, emb_size, emb_extended_size,
|
||||||
|
batch_size);
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
|
||||||
|
using LoDTensor = framework::LoDTensor;
|
||||||
|
template <typename T>
|
||||||
|
class PullBoxExtendedSparseCPUKernel : public framework::OpKernel<T> {
|
||||||
|
public:
|
||||||
|
void Compute(const framework::ExecutionContext &ctx) const override {
|
||||||
|
PullBoxExtendedSparseFunctor<T>(ctx);
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
template <typename T>
|
||||||
|
class PushBoxExtendedSparseCPUKernel : public framework::OpKernel<T> {
|
||||||
|
public:
|
||||||
|
void Compute(const framework::ExecutionContext &ctx) const override {
|
||||||
|
PushBoxExtendedSparseFunctor<T>(ctx);
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
} // namespace operators
|
||||||
|
} // namespace paddle
|
Loading…
Reference in new issue