Merge branch 'develop' of https://github.com/PaddlePaddle/Paddle into add_var_name_in_opt_2
commit
7dc4a7f4f8
@ -1,154 +0,0 @@
|
||||
// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
#include "paddle/fluid/framework/details/data_balance_op_handle.h"
|
||||
#include <algorithm>
|
||||
#include "paddle/fluid/framework/details/container_cast.h"
|
||||
|
||||
namespace paddle {
|
||||
namespace framework {
|
||||
namespace details {
|
||||
|
||||
#if defined(PADDLE_WITH_CUDA) && !defined(_WIN32)
|
||||
DataBalanceOpHandle::DataBalanceOpHandle(
|
||||
ir::Node *node, const std::vector<Scope *> &local_scopes,
|
||||
const std::vector<platform::Place> &places,
|
||||
const platform::NCCLContextMap *ctxs)
|
||||
: OpHandleBase(node), local_scopes_(local_scopes), places_(places) {
|
||||
if (ctxs) {
|
||||
for (auto &p : places_) {
|
||||
this->SetDeviceContext(p, ctxs->DevCtx(p));
|
||||
}
|
||||
}
|
||||
}
|
||||
#else
|
||||
DataBalanceOpHandle::DataBalanceOpHandle(
|
||||
ir::Node *node, const std::vector<Scope *> &local_scopes,
|
||||
const std::vector<platform::Place> &places)
|
||||
: OpHandleBase(node), local_scopes_(local_scopes), places_(places) {}
|
||||
#endif
|
||||
|
||||
std::string DataBalanceOpHandle::Name() const { return "data balance"; }
|
||||
|
||||
std::vector<std::array<int, 3>> DataBalanceOpHandle::GetBalancePlan(
|
||||
const std::vector<int> &device_sizes) {
|
||||
int device_num = device_sizes.size();
|
||||
int total_size = 0;
|
||||
int empty_num = 0;
|
||||
std::vector<std::array<int, 2>> size_device_vec;
|
||||
size_device_vec.reserve(device_num);
|
||||
for (int i = 0; i < device_num; ++i) {
|
||||
if (device_sizes[i] == 0) {
|
||||
++empty_num;
|
||||
}
|
||||
total_size += device_sizes[i];
|
||||
size_device_vec.push_back({{device_sizes[i], i}});
|
||||
}
|
||||
std::vector<std::array<int, 3>> res;
|
||||
if (empty_num == 0) {
|
||||
// No need to do data balance.
|
||||
return res;
|
||||
}
|
||||
if (total_size < device_num) {
|
||||
// No enough data.
|
||||
PADDLE_THROW_EOF();
|
||||
}
|
||||
std::sort(size_device_vec.begin(), size_device_vec.end(),
|
||||
[](const std::array<int, 2> &a, const std::array<int, 2> &b) {
|
||||
return a[0] > b[0];
|
||||
});
|
||||
int expected_device_size = total_size / device_num;
|
||||
int src_idx = 0;
|
||||
for (int dst_idx = device_num - empty_num; dst_idx < device_num; ++dst_idx) {
|
||||
if (size_device_vec[src_idx][0] <= expected_device_size) {
|
||||
++src_idx;
|
||||
PADDLE_ENFORCE_LT(
|
||||
src_idx, device_num - empty_num,
|
||||
"In current srategy an empty tensor should not be copy source.");
|
||||
}
|
||||
size_device_vec[src_idx][0] -= expected_device_size;
|
||||
size_device_vec[dst_idx][0] += expected_device_size;
|
||||
res.push_back({{size_device_vec[src_idx][1], size_device_vec[dst_idx][1],
|
||||
expected_device_size}});
|
||||
}
|
||||
return res;
|
||||
}
|
||||
|
||||
void DataBalanceOpHandle::RunImpl() {
|
||||
PADDLE_ENFORCE_GT(places_.size(), 1UL,
|
||||
"Data balance can only be enabled when the number of "
|
||||
"places to run larger than 1.");
|
||||
auto in_var_handles = DynamicCast<VarHandle>(this->Inputs());
|
||||
auto out_var_handles = DynamicCast<VarHandle>(this->Outputs());
|
||||
PADDLE_ENFORCE(in_var_handles.size() % places_.size() == 0);
|
||||
PADDLE_ENFORCE_EQ(
|
||||
in_var_handles.size(), out_var_handles.size(),
|
||||
"The NoDummyInputSize and NoDummyOutputSize should be equal.");
|
||||
int data_num = in_var_handles.size() / places_.size();
|
||||
WaitInputVarGenerated();
|
||||
std::vector<std::vector<LoDTensor *>> lod_tensors(data_num);
|
||||
std::vector<int> device_sizes;
|
||||
for (int i = 0; i < static_cast<int>(in_var_handles.size()); ++i) {
|
||||
PADDLE_ENFORCE_EQ(in_var_handles[i]->name(), out_var_handles[i]->name(),
|
||||
"The name of input and output should be equal.");
|
||||
int place_idx = i / data_num;
|
||||
int data_idx = i % data_num;
|
||||
auto *local_scope =
|
||||
local_scopes_[place_idx]->FindVar(kLocalExecScopeName)->Get<Scope *>();
|
||||
auto *tensor_var = local_scope->FindVar(in_var_handles[i]->name());
|
||||
PADDLE_ENFORCE(tensor_var->IsType<LoDTensor>());
|
||||
auto *tensor = tensor_var->GetMutable<LoDTensor>();
|
||||
lod_tensors[data_idx].push_back(tensor);
|
||||
int ins_size =
|
||||
tensor->lod().empty() ? tensor->dims()[0] : tensor->NumElements();
|
||||
if (data_idx == 0) {
|
||||
device_sizes.emplace_back(ins_size);
|
||||
} else {
|
||||
PADDLE_ENFORCE_EQ(
|
||||
ins_size, device_sizes.at(place_idx),
|
||||
"All data on the same device shall have the same batch size.");
|
||||
}
|
||||
}
|
||||
const auto &balance_plan = GetBalancePlan(device_sizes);
|
||||
|
||||
for (const auto &trans : balance_plan) {
|
||||
for (int data_idx = 0; data_idx < data_num; ++data_idx) {
|
||||
LoDTensor *src_tensor = lod_tensors[data_idx][trans[0]];
|
||||
LoDTensor *dst_tensor = lod_tensors[data_idx][trans[1]];
|
||||
int trans_ins_size = trans[2];
|
||||
LoD src_lod = src_tensor->lod();
|
||||
int src_ins_size =
|
||||
src_lod.empty() ? src_tensor->dims()[0] : src_tensor->NumElements();
|
||||
int cut_point = src_ins_size - trans_ins_size;
|
||||
if (!src_lod.empty()) {
|
||||
for (auto &level : src_lod) {
|
||||
cut_point = level[cut_point];
|
||||
}
|
||||
}
|
||||
TensorCopySync(src_tensor->Slice(cut_point, src_tensor->dims()[0]),
|
||||
dst_tensor->place(), dst_tensor);
|
||||
src_tensor->ShareDataWith(src_tensor->Slice(0, cut_point));
|
||||
if (!src_lod.empty()) {
|
||||
dst_tensor->set_lod(SliceInLevel(
|
||||
src_lod, 0, src_ins_size - trans_ins_size, src_ins_size));
|
||||
src_tensor->set_lod(
|
||||
SliceInLevel(src_lod, 0, 0, src_ins_size - trans_ins_size));
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace details
|
||||
} // namespace framework
|
||||
} // namespace paddle
|
@ -1,59 +0,0 @@
|
||||
// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <string>
|
||||
#include <vector>
|
||||
#include "paddle/fluid/framework/details/op_handle_base.h"
|
||||
#include "paddle/fluid/framework/lod_tensor.h"
|
||||
#include "paddle/fluid/framework/scope.h"
|
||||
#if defined(PADDLE_WITH_CUDA) && !defined(_WIN32)
|
||||
#include "paddle/fluid/platform/nccl_helper.h"
|
||||
#endif
|
||||
|
||||
namespace paddle {
|
||||
namespace framework {
|
||||
namespace details {
|
||||
|
||||
struct DataBalanceOpHandle : public OpHandleBase {
|
||||
public:
|
||||
#if defined(PADDLE_WITH_CUDA) && !defined(_WIN32)
|
||||
DataBalanceOpHandle(ir::Node *node, const std::vector<Scope *> &local_scopes,
|
||||
const std::vector<platform::Place> &places,
|
||||
const platform::NCCLContextMap *ctxs);
|
||||
#else
|
||||
DataBalanceOpHandle(ir::Node *node, const std::vector<Scope *> &local_scopes,
|
||||
const std::vector<platform::Place> &places);
|
||||
#endif
|
||||
|
||||
std::string Name() const override;
|
||||
|
||||
bool IsMultiDeviceTransfer() override { return false; };
|
||||
|
||||
protected:
|
||||
void RunImpl() override;
|
||||
|
||||
private:
|
||||
// std::vector<(src_dev_id, dst_dev_id, trans_size)>
|
||||
std::vector<std::array<int, 3>> GetBalancePlan(
|
||||
const std::vector<int> &batch_size_per_device);
|
||||
|
||||
const std::vector<Scope *> local_scopes_;
|
||||
const std::vector<platform::Place> places_;
|
||||
};
|
||||
|
||||
} // namespace details
|
||||
} // namespace framework
|
||||
} // namespace paddle
|
@ -1,51 +0,0 @@
|
||||
// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
#include "paddle/fluid/framework/details/fuse_vars_op_handle.h"
|
||||
|
||||
namespace paddle {
|
||||
namespace framework {
|
||||
namespace details {
|
||||
|
||||
void FuseVarsOpHandle::RunImpl() {
|
||||
WaitInputVarGenerated(place_);
|
||||
|
||||
auto in_var_handles = DynamicCast<VarHandle>(this->Inputs());
|
||||
auto out_var_handles = DynamicCast<VarHandle>(this->Outputs());
|
||||
PADDLE_ENFORCE_EQ(in_var_handles.size(), 0UL);
|
||||
PADDLE_ENFORCE_EQ(out_var_handles.size() - 1, inputs_numel_.size(), "");
|
||||
|
||||
auto scope = local_scope_->FindVar(kLocalExecScopeName)->Get<Scope *>();
|
||||
|
||||
auto out_var_handle = out_var_handles[0];
|
||||
auto out_var = scope->Var(out_var_handle->name());
|
||||
|
||||
auto out_tensor = out_var->GetMutable<LoDTensor>();
|
||||
out_tensor->Resize({total_numel_}).mutable_data(this->place_, type_);
|
||||
|
||||
int64_t s = 0;
|
||||
for (size_t i = 1; i < out_var_handles.size(); ++i) {
|
||||
auto out_name = out_var_handles[i]->name();
|
||||
auto out_t = scope->Var(out_name)->GetMutable<LoDTensor>();
|
||||
auto numel = this->inputs_numel_.at(out_name);
|
||||
out_t->ShareDataWith(out_tensor->Slice(s, s + numel));
|
||||
s += numel;
|
||||
}
|
||||
this->RunAndRecordEvent([] {});
|
||||
}
|
||||
|
||||
std::string FuseVarsOpHandle::Name() const { return "fuse vars"; }
|
||||
} // namespace details
|
||||
} // namespace framework
|
||||
} // namespace paddle
|
@ -1,65 +0,0 @@
|
||||
// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <map>
|
||||
#include <string>
|
||||
#include <vector>
|
||||
|
||||
#include "paddle/fluid/framework/details/container_cast.h"
|
||||
#include "paddle/fluid/framework/details/op_handle_base.h"
|
||||
#include "paddle/fluid/framework/lod_tensor.h"
|
||||
#include "paddle/fluid/framework/scope.h"
|
||||
#include "paddle/fluid/platform/device_context.h"
|
||||
|
||||
namespace paddle {
|
||||
namespace framework {
|
||||
namespace details {
|
||||
|
||||
struct FuseVarsOpHandle : public OpHandleBase {
|
||||
public:
|
||||
FuseVarsOpHandle(ir::Node *node, Scope *local_scope,
|
||||
const platform::Place &place,
|
||||
const std::unordered_map<std::string, int64_t> &inputs_numel,
|
||||
const proto::VarType::Type var_type)
|
||||
: OpHandleBase(node),
|
||||
local_scope_(local_scope),
|
||||
place_(place),
|
||||
inputs_numel_(inputs_numel),
|
||||
type_(var_type) {
|
||||
total_numel_ = 0;
|
||||
for (auto in_numel : inputs_numel) {
|
||||
PADDLE_ENFORCE_GT(in_numel.second, 0);
|
||||
total_numel_ += in_numel.second;
|
||||
}
|
||||
}
|
||||
|
||||
std::string Name() const override;
|
||||
|
||||
bool IsMultiDeviceTransfer() override { return false; };
|
||||
|
||||
protected:
|
||||
void RunImpl() override;
|
||||
|
||||
private:
|
||||
Scope *local_scope_;
|
||||
const platform::Place place_;
|
||||
const std::unordered_map<std::string, int64_t> inputs_numel_;
|
||||
const proto::VarType::Type type_;
|
||||
int64_t total_numel_;
|
||||
};
|
||||
} // namespace details
|
||||
} // namespace framework
|
||||
} // namespace paddle
|
File diff suppressed because it is too large
Load Diff
@ -0,0 +1,38 @@
|
||||
/* Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License. */
|
||||
|
||||
#include "paddle/fluid/operators/elementwise/elementwise_floordiv_op.h"
|
||||
#include <string>
|
||||
#include "paddle/fluid/operators/elementwise/elementwise_op.h"
|
||||
|
||||
namespace paddle {
|
||||
namespace operators {
|
||||
class ElementwiseFloorDivOpMaker : public ElementwiseOpMaker {
|
||||
protected:
|
||||
std::string GetName() const override { return "FloorDiv"; }
|
||||
std::string GetEquation() const override { return "Out = X // Y"; }
|
||||
};
|
||||
} // namespace operators
|
||||
} // namespace paddle
|
||||
|
||||
namespace ops = paddle::operators;
|
||||
|
||||
REGISTER_OP_WITHOUT_GRADIENT(elementwise_floordiv, ops::ElementwiseOp,
|
||||
ops::ElementwiseFloorDivOpMaker);
|
||||
|
||||
REGISTER_OP_CPU_KERNEL(
|
||||
elementwise_floordiv,
|
||||
ops::ElementwiseFloorDivKernel<paddle::platform::CPUDeviceContext, int>,
|
||||
ops::ElementwiseFloorDivKernel<paddle::platform::CPUDeviceContext,
|
||||
int64_t>);
|
@ -0,0 +1,23 @@
|
||||
/* Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License. */
|
||||
#include "paddle/fluid/operators/elementwise/elementwise_floordiv_op.h"
|
||||
#include "paddle/fluid/platform/float16.h"
|
||||
|
||||
namespace ops = paddle::operators;
|
||||
namespace plat = paddle::platform;
|
||||
|
||||
REGISTER_OP_CUDA_KERNEL(
|
||||
elementwise_floordiv,
|
||||
ops::ElementwiseFloorDivKernel<plat::CUDADeviceContext, int>,
|
||||
ops::ElementwiseFloorDivKernel<plat::CUDADeviceContext, int64_t>);
|
@ -0,0 +1,55 @@
|
||||
/* Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License. */
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "paddle/fluid/framework/eigen.h"
|
||||
#include "paddle/fluid/operators/elementwise/elementwise_op.h"
|
||||
#include "paddle/fluid/operators/elementwise/elementwise_op_function.h"
|
||||
#include "paddle/fluid/operators/math/blas.h"
|
||||
|
||||
namespace paddle {
|
||||
namespace operators {
|
||||
|
||||
template <typename T>
|
||||
struct FloorDivFunctor {
|
||||
inline HOSTDEVICE T operator()(T a, T b) const { return a / b; }
|
||||
};
|
||||
|
||||
template <typename DeviceContext, typename T>
|
||||
void elementwise_floor_div(const framework::ExecutionContext &ctx,
|
||||
const framework::Tensor *x,
|
||||
const framework::Tensor *y, framework::Tensor *z) {
|
||||
int axis = ctx.Attr<int>("axis");
|
||||
ElementwiseComputeEx<FloorDivFunctor<T>, DeviceContext, T>(
|
||||
ctx, x, y, axis, FloorDivFunctor<T>(), z);
|
||||
}
|
||||
|
||||
template <typename DeviceContext, typename T>
|
||||
class ElementwiseFloorDivKernel : public framework::OpKernel<T> {
|
||||
public:
|
||||
void Compute(const framework::ExecutionContext &ctx) const override {
|
||||
auto *x = ctx.Input<framework::LoDTensor>("X");
|
||||
auto *y = ctx.Input<framework::LoDTensor>("Y");
|
||||
auto *z = ctx.Output<framework::LoDTensor>("Out");
|
||||
|
||||
z->mutable_data<T>(ctx.GetPlace());
|
||||
|
||||
// dtype of x and y is int64 or int32
|
||||
elementwise_floor_div<DeviceContext, T>(ctx, x, y, z);
|
||||
}
|
||||
};
|
||||
|
||||
} // namespace operators
|
||||
} // namespace paddle
|
@ -0,0 +1,36 @@
|
||||
/* Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License. */
|
||||
|
||||
#include "paddle/fluid/operators/elementwise/elementwise_mod_op.h"
|
||||
#include <string>
|
||||
#include "paddle/fluid/operators/elementwise/elementwise_op.h"
|
||||
|
||||
namespace paddle {
|
||||
namespace operators {
|
||||
class ElementwiseModOpMaker : public ElementwiseOpMaker {
|
||||
protected:
|
||||
std::string GetName() const override { return "Mod"; }
|
||||
std::string GetEquation() const override { return "Out = X % Y"; }
|
||||
};
|
||||
} // namespace operators
|
||||
} // namespace paddle
|
||||
|
||||
namespace ops = paddle::operators;
|
||||
REGISTER_OP_WITHOUT_GRADIENT(elementwise_mod, ops::ElementwiseOp,
|
||||
ops::ElementwiseModOpMaker);
|
||||
|
||||
REGISTER_OP_CPU_KERNEL(
|
||||
elementwise_mod,
|
||||
ops::ElementwiseModKernel<paddle::platform::CPUDeviceContext, int>,
|
||||
ops::ElementwiseModKernel<paddle::platform::CPUDeviceContext, int64_t>);
|
@ -0,0 +1,22 @@
|
||||
/* Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License. */
|
||||
#include "paddle/fluid/operators/elementwise/elementwise_mod_op.h"
|
||||
#include "paddle/fluid/platform/float16.h"
|
||||
|
||||
namespace ops = paddle::operators;
|
||||
namespace plat = paddle::platform;
|
||||
|
||||
REGISTER_OP_CUDA_KERNEL(
|
||||
elementwise_mod, ops::ElementwiseModKernel<plat::CUDADeviceContext, int>,
|
||||
ops::ElementwiseModKernel<plat::CUDADeviceContext, int64_t>);
|
@ -0,0 +1,55 @@
|
||||
/* Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License. */
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "paddle/fluid/framework/eigen.h"
|
||||
#include "paddle/fluid/operators/elementwise/elementwise_op.h"
|
||||
#include "paddle/fluid/operators/elementwise/elementwise_op_function.h"
|
||||
#include "paddle/fluid/operators/math/blas.h"
|
||||
|
||||
namespace paddle {
|
||||
namespace operators {
|
||||
|
||||
template <typename T>
|
||||
struct ModFunctor {
|
||||
inline HOSTDEVICE T operator()(T a, T b) const { return a % b; }
|
||||
};
|
||||
|
||||
template <typename DeviceContext, typename T>
|
||||
void elementwise_mod(const framework::ExecutionContext &ctx,
|
||||
const framework::Tensor *x, const framework::Tensor *y,
|
||||
framework::Tensor *z) {
|
||||
int axis = ctx.Attr<int>("axis");
|
||||
ElementwiseComputeEx<ModFunctor<T>, DeviceContext, T>(ctx, x, y, axis,
|
||||
ModFunctor<T>(), z);
|
||||
}
|
||||
|
||||
template <typename DeviceContext, typename T>
|
||||
class ElementwiseModKernel : public framework::OpKernel<T> {
|
||||
public:
|
||||
void Compute(const framework::ExecutionContext &ctx) const override {
|
||||
auto *x = ctx.Input<framework::LoDTensor>("X");
|
||||
auto *y = ctx.Input<framework::LoDTensor>("Y");
|
||||
auto *z = ctx.Output<framework::LoDTensor>("Out");
|
||||
|
||||
z->mutable_data<T>(ctx.GetPlace());
|
||||
|
||||
// dtype of x and y is int64 or int32
|
||||
elementwise_mod<DeviceContext, T>(ctx, x, y, z);
|
||||
}
|
||||
};
|
||||
|
||||
} // namespace operators
|
||||
} // namespace paddle
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in new issue