Merge pull request #9746 from typhoonzero/multigpumultinode
[Feature] Enable multi gpu distributed training of fluidwangkuiyi-patch-2
commit
652cf43002
@ -0,0 +1,43 @@
|
|||||||
|
// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
|
||||||
|
//
|
||||||
|
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
// you may not use this file except in compliance with the License.
|
||||||
|
// You may obtain a copy of the License at
|
||||||
|
//
|
||||||
|
// http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
//
|
||||||
|
// Unless required by applicable law or agreed to in writing, software
|
||||||
|
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
// See the License for the specific language governing permissions and
|
||||||
|
// limitations under the License.
|
||||||
|
|
||||||
|
#include "paddle/fluid/framework/details/send_op_handle.h"
|
||||||
|
|
||||||
|
namespace paddle {
|
||||||
|
namespace framework {
|
||||||
|
namespace details {
|
||||||
|
|
||||||
|
SendOpHandle::SendOpHandle(const framework::OpDesc &op_desc,
|
||||||
|
const Scope *local_scope,
|
||||||
|
const platform::Place &place)
|
||||||
|
: op_(framework::OpRegistry::CreateOp(op_desc)),
|
||||||
|
local_scope_(local_scope),
|
||||||
|
place_(place) {}
|
||||||
|
|
||||||
|
void SendOpHandle::RunImpl() {
|
||||||
|
// Wait input done
|
||||||
|
for (auto *in : inputs_) {
|
||||||
|
auto &p = static_cast<VarHandle *>(in)->place_;
|
||||||
|
if (in->DebugString() == "dummy") { // HACK
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
in->generated_op_->Wait(dev_ctxes_[p]);
|
||||||
|
}
|
||||||
|
op_->Run(*local_scope_, place_);
|
||||||
|
}
|
||||||
|
|
||||||
|
std::string SendOpHandle::Name() const { return "send"; }
|
||||||
|
} // namespace details
|
||||||
|
} // namespace framework
|
||||||
|
} // namespace paddle
|
@ -0,0 +1,50 @@
|
|||||||
|
// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
|
||||||
|
//
|
||||||
|
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
// you may not use this file except in compliance with the License.
|
||||||
|
// You may obtain a copy of the License at
|
||||||
|
//
|
||||||
|
// http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
//
|
||||||
|
// Unless required by applicable law or agreed to in writing, software
|
||||||
|
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
// See the License for the specific language governing permissions and
|
||||||
|
// limitations under the License.
|
||||||
|
|
||||||
|
#pragma once
|
||||||
|
|
||||||
|
#include <string>
|
||||||
|
#include <vector>
|
||||||
|
|
||||||
|
#include "paddle/fluid/framework/details/op_handle_base.h"
|
||||||
|
#include "paddle/fluid/framework/lod_tensor.h"
|
||||||
|
#include "paddle/fluid/framework/op_registry.h"
|
||||||
|
#include "paddle/fluid/framework/operator.h"
|
||||||
|
#include "paddle/fluid/framework/scope.h"
|
||||||
|
|
||||||
|
namespace paddle {
|
||||||
|
namespace framework {
|
||||||
|
namespace details {
|
||||||
|
|
||||||
|
struct SendOpHandle : public OpHandleBase {
|
||||||
|
std::unique_ptr<OperatorBase> op_;
|
||||||
|
const Scope* local_scope_;
|
||||||
|
const platform::Place& place_;
|
||||||
|
|
||||||
|
SendOpHandle(const framework::OpDesc& op_desc, const Scope* local_scope,
|
||||||
|
const platform::Place& place);
|
||||||
|
|
||||||
|
std::string Name() const override;
|
||||||
|
|
||||||
|
// Delay and buffer nccl_all_reduce together can significantly increase
|
||||||
|
// performance. Disable this feature by returning false.
|
||||||
|
bool IsMultiDeviceTransfer() override { return false; };
|
||||||
|
|
||||||
|
protected:
|
||||||
|
void RunImpl() override;
|
||||||
|
};
|
||||||
|
|
||||||
|
} // namespace details
|
||||||
|
} // namespace framework
|
||||||
|
} // namespace paddle
|
Loading…
Reference in new issue