Merge branch 'develop' of https://github.com/PaddlePaddle/Paddle into rsqrt
commit
082822d417
@ -0,0 +1,32 @@
|
||||
if(NOT WITH_GPU)
|
||||
return()
|
||||
endif()
|
||||
|
||||
set(ANAKIN_ROOT "/usr" CACHE PATH "ANAKIN ROOT")
|
||||
find_path(ANAKIN_INCLUDE_DIR anakin_config.h
|
||||
PATHS ${ANAKIN_ROOT} ${ANAKIN_ROOT}/include
|
||||
$ENV{ANAKIN_ROOT} $ENV{ANAKIN_ROOT}/include
|
||||
NO_DEFAULT_PATH
|
||||
)
|
||||
|
||||
find_library(ANAKIN_LIBRARY NAMES libanakin_saber_common.so libanakin.so
|
||||
PATHS ${ANAKIN_ROOT}
|
||||
$ENV{ANAKIN_ROOT} $ENV{ANAKIN_ROOT}/lib
|
||||
NO_DEFAULT_PATH
|
||||
DOC "Path to ANAKIN library.")
|
||||
|
||||
if(ANAKIN_INCLUDE_DIR AND ANAKIN_LIBRARY)
|
||||
if(WITH_DSO)
|
||||
set(ANAKIN_FOUND ON)
|
||||
endif(WITH_DSO)
|
||||
else()
|
||||
set(ANAKIN_FOUND OFF)
|
||||
endif()
|
||||
|
||||
if(ANAKIN_FOUND)
|
||||
message(STATUS "Current ANAKIN header is ${ANAKIN_INCLUDE_DIR}/anakin_config.h. ")
|
||||
include_directories(${ANAKIN_ROOT}/include)
|
||||
include_directories(${ANAKIN_ROOT}/include/saber)
|
||||
link_directories(${ANAKIN_ROOT})
|
||||
add_definitions(-DPADDLE_WITH_ANAKIN)
|
||||
endif()
|
@ -0,0 +1,42 @@
|
||||
# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
INCLUDE(ExternalProject)
|
||||
|
||||
SET(DGC_SOURCES_DIR "${THIRD_PARTY_PATH}/dgc")
|
||||
SET(DGC_INSTALL_DIR "${THIRD_PARTY_PATH}/install/dgc")
|
||||
SET(DGC_INCLUDE_DIR "${DGC_INSTALL_DIR}/include" CACHE PATH "dgc include directory." FORCE)
|
||||
SET(DGC_LIBRARIES "${DGC_INSTALL_DIR}/lib/libdgc.a" CACHE FILEPATH "dgc library." FORCE)
|
||||
INCLUDE_DIRECTORIES(${DGC_INCLUDE_DIR})
|
||||
|
||||
ExternalProject_Add(
|
||||
extern_dgc
|
||||
${EXTERNAL_PROJECT_LOG_ARGS}
|
||||
GIT_REPOSITORY "https://github.com/PaddlePaddle/Fleet"
|
||||
GIT_TAG "2d04dc3800cdd0601f1b65d547dabcc60b0cf9dc"
|
||||
SOURCE_DIR "${DGC_SOURCES_DIR}"
|
||||
CONFIGURE_COMMAND ""
|
||||
BUILD_COMMAND cd collective && make -j
|
||||
INSTALL_COMMAND mkdir -p ${DGC_INSTALL_DIR}/lib/ ${DGC_INCLUDE_DIR}/dgc
|
||||
&& cp ${DGC_SOURCES_DIR}/collective/build/lib/libdgc.a ${DGC_LIBRARIES}
|
||||
&& cp ${DGC_SOURCES_DIR}/collective/build/include/dgc.h ${DGC_INCLUDE_DIR}/dgc/
|
||||
BUILD_IN_SOURCE 1
|
||||
)
|
||||
|
||||
ADD_LIBRARY(dgc SHARED IMPORTED GLOBAL)
|
||||
SET_PROPERTY(TARGET dgc PROPERTY IMPORTED_LOCATION ${DGC_LIBRARIES})
|
||||
ADD_DEPENDENCIES(dgc extern_dgc)
|
||||
|
||||
LIST(APPEND external_project_dependencies dgc)
|
||||
|
@ -1,140 +0,0 @@
|
||||
// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
#pragma once
|
||||
#include <string>
|
||||
#include <vector>
|
||||
#include "paddle/fluid/framework/details/computation_op_handle.h"
|
||||
#include "paddle/fluid/framework/details/op_handle_base.h"
|
||||
#include "paddle/fluid/framework/details/var_handle.h"
|
||||
#include "paddle/fluid/framework/garbage_collector.h"
|
||||
#include "paddle/fluid/framework/lod_tensor_array.h"
|
||||
#include "paddle/fluid/framework/scope.h"
|
||||
#include "paddle/fluid/framework/selected_rows.h"
|
||||
#include "paddle/fluid/framework/tensor.h"
|
||||
|
||||
namespace paddle {
|
||||
namespace framework {
|
||||
namespace details {
|
||||
|
||||
class EarlyDeleteOpHandle : public OpHandleBase {
|
||||
public:
|
||||
EarlyDeleteOpHandle(ir::Node* node, const Scope* scope,
|
||||
const platform::Place& place,
|
||||
const std::vector<std::string>& names,
|
||||
GarbageCollector* gc)
|
||||
: OpHandleBase(node),
|
||||
scope_(scope),
|
||||
place_(place),
|
||||
names_(names),
|
||||
gc_(gc) {
|
||||
#ifdef PADDLE_WITH_CUDA
|
||||
if (IsStreamGarabageCollector()) {
|
||||
auto gpu_place = boost::get<platform::CUDAPlace>(place);
|
||||
PADDLE_ENFORCE(cudaSetDevice(gpu_place.device));
|
||||
PADDLE_ENFORCE(cudaEventCreateWithFlags(&event_, cudaEventDisableTiming));
|
||||
}
|
||||
#endif
|
||||
}
|
||||
~EarlyDeleteOpHandle() {
|
||||
#ifdef PADDLE_WITH_CUDA
|
||||
if (IsStreamGarabageCollector()) {
|
||||
auto gpu_place = boost::get<platform::CUDAPlace>(dev_ctx_->GetPlace());
|
||||
PADDLE_ENFORCE(cudaSetDevice(gpu_place.device));
|
||||
PADDLE_ENFORCE(cudaEventDestroy(event_));
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
std::string Name() const override { return "early_delete"; }
|
||||
|
||||
protected:
|
||||
void RunImpl() override {
|
||||
std::vector<std::shared_ptr<memory::Allocation>> tensors;
|
||||
auto* local_scope = scope_->FindVar(kLocalExecScopeName)->Get<Scope*>();
|
||||
for (auto& var_name : names_) {
|
||||
auto* var = local_scope->FindVar(var_name);
|
||||
PADDLE_ENFORCE(var != nullptr,
|
||||
string::Sprintf("Local Scope not has var %s", var_name));
|
||||
if (var->IsType<LoDTensor>()) {
|
||||
tensors.emplace_back(var->GetMutable<LoDTensor>()->MoveMemoryHolder());
|
||||
} else if (var->IsType<SelectedRows>()) {
|
||||
tensors.emplace_back(var->GetMutable<SelectedRows>()
|
||||
->mutable_value()
|
||||
->MoveMemoryHolder());
|
||||
} else if (var->IsType<LoDTensorArray>()) {
|
||||
LoDTensorArray* tensor_array = var->GetMutable<LoDTensorArray>();
|
||||
for (auto& tensor : *tensor_array) {
|
||||
tensors.emplace_back(tensor.MoveMemoryHolder());
|
||||
}
|
||||
}
|
||||
}
|
||||
if (!tensors.empty()) {
|
||||
ClearTensors(tensors);
|
||||
}
|
||||
}
|
||||
|
||||
private:
|
||||
void ClearTensors(
|
||||
const std::vector<std::shared_ptr<memory::Allocation>>& tensors) {
|
||||
if (platform::is_cpu_place(place_)) {
|
||||
ClearCPUTensors(tensors);
|
||||
} else {
|
||||
ClearGPUTensors(tensors);
|
||||
}
|
||||
}
|
||||
|
||||
void ClearCPUTensors(
|
||||
const std::vector<std::shared_ptr<memory::Allocation>>& tensors) {
|
||||
auto* gc = dynamic_cast<CPUGarbageCollector*>(gc_);
|
||||
if (gc != nullptr) {
|
||||
gc->Add(tensors);
|
||||
}
|
||||
}
|
||||
|
||||
void ClearGPUTensors(
|
||||
const std::vector<std::shared_ptr<memory::Allocation>>& tensors) {
|
||||
#ifdef PADDLE_WITH_CUDA
|
||||
auto* gc = dynamic_cast<StreamGarbageCollector*>(gc_);
|
||||
if (gc != nullptr) {
|
||||
auto compute_stream = dev_ctx_->stream();
|
||||
auto callback_stream = gc->stream();
|
||||
auto callback_func = [=]() {
|
||||
PADDLE_ENFORCE(cudaEventRecord(event_, compute_stream));
|
||||
PADDLE_ENFORCE(cudaStreamWaitEvent(callback_stream, event_, 0));
|
||||
};
|
||||
gc_->Add(tensors, callback_func);
|
||||
} else {
|
||||
gc_->Add(tensors);
|
||||
}
|
||||
}
|
||||
|
||||
bool IsStreamGarabageCollector() const {
|
||||
return dynamic_cast<const StreamGarbageCollector*>(gc_) != nullptr;
|
||||
#endif
|
||||
}
|
||||
|
||||
const Scope* scope_;
|
||||
const platform::Place place_;
|
||||
std::vector<std::string> names_;
|
||||
GarbageCollector* gc_;
|
||||
#ifdef PADDLE_WITH_CUDA
|
||||
platform::CUDADeviceContext* dev_ctx_;
|
||||
cudaEvent_t event_;
|
||||
#endif
|
||||
};
|
||||
|
||||
} // namespace details
|
||||
} // namespace framework
|
||||
} // namespace paddle
|
@ -0,0 +1,66 @@
|
||||
// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
#include "paddle/fluid/framework/details/fetch_barrier_op_handle.h"
|
||||
|
||||
#include <string>
|
||||
|
||||
namespace paddle {
|
||||
namespace framework {
|
||||
namespace details {
|
||||
FetchBarrierOpHandle::FetchBarrierOpHandle(
|
||||
ir::Node *node, const std::vector<Scope *> &local_scopes,
|
||||
const std::vector<platform::Place> &places)
|
||||
// fetch_barrier op always run on place0, but output on all places.
|
||||
: OpHandleBase(node),
|
||||
op_(framework::OpRegistry::CreateOp(*node->Op())),
|
||||
local_scopes_(local_scopes),
|
||||
places_(places),
|
||||
run_scope_(local_scopes[0]),
|
||||
place_(places[0]) {
|
||||
for (auto &p : places) {
|
||||
this->SetDeviceContext(p, platform::DeviceContextPool::Instance().Get(p));
|
||||
}
|
||||
}
|
||||
|
||||
bool FetchBarrierOpHandle::IsMultiDeviceTransfer() {
|
||||
// override IsMultiDeviceTransfer to return true
|
||||
return true;
|
||||
}
|
||||
|
||||
void FetchBarrierOpHandle::RunImpl() {
|
||||
WaitInputVarGenerated(place_);
|
||||
|
||||
auto run_func = [this]() {
|
||||
op_->Run(*run_scope_->FindVar(kLocalExecScopeName)->Get<Scope *>(), place_);
|
||||
};
|
||||
|
||||
if (is_lock_and_record_event_free_) {
|
||||
run_func();
|
||||
} else {
|
||||
this->RunAndRecordEvent(run_func);
|
||||
}
|
||||
}
|
||||
|
||||
bool FetchBarrierOpHandle::NeedWait(VarHandleBase *in_var) {
|
||||
bool need_wait =
|
||||
in_var && in_var->GeneratedOp() &&
|
||||
in_var->GeneratedOp()->DeviceContext(place_) != dev_ctxes_.at(place_);
|
||||
return need_wait;
|
||||
}
|
||||
|
||||
std::string FetchBarrierOpHandle::Name() const { return op_->Type(); }
|
||||
} // namespace details
|
||||
} // namespace framework
|
||||
} // namespace paddle
|
@ -0,0 +1,61 @@
|
||||
// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <memory>
|
||||
#include <string>
|
||||
#include <vector>
|
||||
|
||||
#include "paddle/fluid/framework/details/op_handle_base.h"
|
||||
#include "paddle/fluid/framework/feed_fetch_type.h"
|
||||
#include "paddle/fluid/framework/op_registry.h"
|
||||
#include "paddle/fluid/framework/scope.h"
|
||||
#include "paddle/fluid/platform/device_context.h"
|
||||
|
||||
namespace paddle {
|
||||
namespace framework {
|
||||
namespace details {
|
||||
|
||||
// **NOTE**: fetch_barrier op is special it outputs all recved variables on
|
||||
// all places if there are multiple places, must init with
|
||||
// multiple dev_ctxes_ !!!!
|
||||
|
||||
struct FetchBarrierOpHandle : public OpHandleBase {
|
||||
public:
|
||||
FetchBarrierOpHandle(ir::Node *node, const std::vector<Scope *> &local_scopes,
|
||||
const std::vector<platform::Place> &places);
|
||||
|
||||
bool IsMultiDeviceTransfer() override;
|
||||
|
||||
std::string Name() const override;
|
||||
|
||||
protected:
|
||||
void RunImpl() override;
|
||||
|
||||
bool NeedWait(VarHandleBase *in_var) override;
|
||||
|
||||
private:
|
||||
std::unique_ptr<OperatorBase> op_;
|
||||
std::vector<Scope *> local_scopes_;
|
||||
std::vector<platform::Place> places_;
|
||||
Scope *run_scope_;
|
||||
platform::Place place_;
|
||||
|
||||
bool is_lock_and_record_event_free_{false};
|
||||
};
|
||||
|
||||
} // namespace details
|
||||
} // namespace framework
|
||||
} // namespace paddle
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in new issue