Merge pull request #14617 from wopeizl/windows/online

Windows/online
local_add_cudnn_lstm
wopeizl 6 years ago committed by GitHub
commit db9284ecde
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23

@ -20,7 +20,7 @@ limitations under the License. */
#include "paddle/fluid/framework/ir/graph.h" #include "paddle/fluid/framework/ir/graph.h"
#ifdef PADDLE_WITH_CUDA #if defined(PADDLE_WITH_CUDA) && !defined(_WIN32)
#include "paddle/fluid/platform/nccl_helper.h" #include "paddle/fluid/platform/nccl_helper.h"
#endif #endif
@ -54,7 +54,7 @@ class ParallelExecutorPrivate {
Scope *global_scope_; // not owned Scope *global_scope_; // not owned
std::unique_ptr<details::SSAGraphExecutor> executor_; std::unique_ptr<details::SSAGraphExecutor> executor_;
#ifdef PADDLE_WITH_CUDA #if defined(PADDLE_WITH_CUDA) && !defined(_WIN32)
std::unique_ptr<platform::NCCLContextMap> nccl_ctxs_; std::unique_ptr<platform::NCCLContextMap> nccl_ctxs_;
#endif #endif
bool own_local_scope_; bool own_local_scope_;
@ -104,7 +104,7 @@ ParallelExecutor::ParallelExecutor(
if (member_->use_cuda_) { if (member_->use_cuda_) {
// Bcast Parameters to all GPUs // Bcast Parameters to all GPUs
#ifdef PADDLE_WITH_CUDA #if defined(PADDLE_WITH_CUDA) && !defined(_WIN32)
auto *nccl_id_var = scope->FindVar(NCCL_ID_VARNAME); auto *nccl_id_var = scope->FindVar(NCCL_ID_VARNAME);
ncclUniqueId *nccl_id = nullptr; ncclUniqueId *nccl_id = nullptr;
if (nccl_id_var != nullptr) { if (nccl_id_var != nullptr) {
@ -124,7 +124,7 @@ ParallelExecutor::ParallelExecutor(
// Step 2. Convert main_program to SSA form and dependency graph. Also, insert // Step 2. Convert main_program to SSA form and dependency graph. Also, insert
// ncclOp // ncclOp
#ifdef PADDLE_WITH_CUDA #if defined(PADDLE_WITH_CUDA) && !defined(_WIN32)
std::unique_ptr<ir::Graph> graph = build_strategy.Apply( std::unique_ptr<ir::Graph> graph = build_strategy.Apply(
main_program, member_->places_, loss_var_name, params, main_program, member_->places_, loss_var_name, params,
member_->local_scopes_, member_->use_cuda_, member_->nccl_ctxs_.get()); member_->local_scopes_, member_->use_cuda_, member_->nccl_ctxs_.get());
@ -213,7 +213,7 @@ void ParallelExecutor::BCastParamsToDevices(
} }
auto &dims = main_tensor.dims(); auto &dims = main_tensor.dims();
if (paddle::platform::is_gpu_place(main_tensor.place())) { if (paddle::platform::is_gpu_place(main_tensor.place())) {
#ifdef PADDLE_WITH_CUDA #if defined(PADDLE_WITH_CUDA) && !defined(_WIN32)
std::vector<void *> buffers; std::vector<void *> buffers;
size_t numel = main_tensor.numel(); size_t numel = main_tensor.numel();
ncclDataType_t data_type = platform::ToNCCLDataType(main_tensor.type()); ncclDataType_t data_type = platform::ToNCCLDataType(main_tensor.type());

@ -76,7 +76,8 @@ void TestWord2vecPrediction(const std::string& model_path) {
0.000932706}; 0.000932706};
const size_t num_elements = outputs.front().data.length() / sizeof(float); const size_t num_elements = outputs.front().data.length() / sizeof(float);
// The outputs' buffers are in CPU memory. // The outputs' buffers are in CPU memory.
for (size_t i = 0; i < std::min((size_t)5UL, num_elements); i++) { for (size_t i = 0; i < std::min(static_cast<size_t>(5UL), num_elements);
i++) {
LOG(INFO) << "data: " LOG(INFO) << "data: "
<< static_cast<float*>(outputs.front().data.data())[i]; << static_cast<float*>(outputs.front().data.data())[i];
PADDLE_ENFORCE(static_cast<float*>(outputs.front().data.data())[i], PADDLE_ENFORCE(static_cast<float*>(outputs.front().data.data())[i],

@ -86,7 +86,11 @@ void CPUAllocator::Free(void* p, size_t size, size_t index) {
munlock(p, size); munlock(p, size);
#endif #endif
} }
#ifdef _WIN32
_aligned_free(p);
#else
free(p); free(p);
#endif
} }
bool CPUAllocator::UseGpu() const { return false; } bool CPUAllocator::UseGpu() const { return false; }

@ -12,7 +12,9 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and See the License for the specific language governing permissions and
limitations under the License. */ limitations under the License. */
#ifndef _WIN32
#include <unistd.h> #include <unistd.h>
#endif
#include <string> #include <string>
#include <thread> // NOLINT #include <thread> // NOLINT

@ -16,13 +16,12 @@ limitations under the License. */
#include "paddle/fluid/operators/math/math_function.h" #include "paddle/fluid/operators/math/math_function.h"
#include "paddle/fluid/operators/math/sequence_pooling.h" #include "paddle/fluid/operators/math/sequence_pooling.h"
#include "paddle/fluid/platform/cuda_primitives.h" #include "paddle/fluid/platform/cuda_primitives.h"
#include "paddle/fluid/platform/macros.h"
namespace paddle { namespace paddle {
namespace operators { namespace operators {
namespace math { namespace math {
#define FLT_MAX __FLT_MAX__
template <typename T> template <typename T>
struct MaxPoolFunctor { struct MaxPoolFunctor {
HOSTDEVICE void operator()(const T* input, const size_t start, HOSTDEVICE void operator()(const T* input, const size_t start,

@ -20,12 +20,12 @@ limitations under the License. */
#include "paddle/fluid/platform/enforce.h" #include "paddle/fluid/platform/enforce.h"
#ifndef _WIN32 #ifndef _WIN32
const float fraction_of_gpu_memory_to_use = 0.92f; constexpr static float fraction_of_gpu_memory_to_use = 0.92f;
#else #else
// fraction_of_gpu_memory_to_use cannot be too high on windows, // fraction_of_gpu_memory_to_use cannot be too high on windows,
// since the win32 graphic sub-system can occupy some GPU memory // since the win32 graphic sub-system can occupy some GPU memory
// which may lead to insufficient memory left for paddle // which may lead to insufficient memory left for paddle
const float fraction_of_gpu_memory_to_use = 0.5f; constexpr static float fraction_of_gpu_memory_to_use = 0.5f;
#endif #endif
DEFINE_double(fraction_of_gpu_memory_to_use, fraction_of_gpu_memory_to_use, DEFINE_double(fraction_of_gpu_memory_to_use, fraction_of_gpu_memory_to_use,

@ -29,8 +29,16 @@ limitations under the License. */
namespace pybind11 { namespace pybind11 {
namespace detail { namespace detail {
#if !defined(PYBIND11_HIDDEN)
#ifdef _WIN32
#define PYBIND11_HIDDEN __declspec(dllexport)
#else
#define PYBIND11_HIDDEN __attribute__((visibility("hidden")))
#endif
#endif
// Can be replaced by a generic lambda in C++14 // Can be replaced by a generic lambda in C++14
struct __attribute__((visibility("hidden"))) paddle_variant_caster_visitor struct PYBIND11_HIDDEN paddle_variant_caster_visitor
: public boost::static_visitor<handle> { : public boost::static_visitor<handle> {
return_value_policy policy; return_value_policy policy;
handle parent; handle parent;

@ -63,7 +63,7 @@ function(py_test_modules TARGET_NAME)
set(multiValueArgs MODULES DEPS ENVS) set(multiValueArgs MODULES DEPS ENVS)
cmake_parse_arguments(py_test_modules "${options}" "${oneValueArgs}" "${multiValueArgs}" ${ARGN}) cmake_parse_arguments(py_test_modules "${options}" "${oneValueArgs}" "${multiValueArgs}" ${ARGN})
add_test(NAME ${TARGET_NAME} add_test(NAME ${TARGET_NAME}
COMMAND env PYTHONPATH=${PADDLE_BINARY_DIR}/python ${py_test_modules_ENVS} COMMAND ${CMAKE_COMMAND} -E env PYTHONPATH=${PADDLE_BINARY_DIR}/python ${py_test_modules_ENVS}
${PYTHON_EXECUTABLE} ${PADDLE_SOURCE_DIR}/tools/test_runner.py ${py_test_modules_MODULES} ${PYTHON_EXECUTABLE} ${PADDLE_SOURCE_DIR}/tools/test_runner.py ${py_test_modules_MODULES}
WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}) WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR})
if (py_test_modules_SERIAL) if (py_test_modules_SERIAL)

Loading…
Cancel
Save