Merge branch 'windows/build' into windows/online

test=develop
7 years ago · 31de60959a
parent a8f97a8264 dfbac60398
commit 31de60959a
57 changed files with 1577 additions and 106 deletions
--- a/AUTHORS.md
+++ b/AUTHORS.md
@ -25,6 +25,7 @@
 | kexinzhao | Ke-Xin Zhao |
 | kuke | Yi-Bing Liu |
 | lcy-seso | Ying Cao |
+| cjld | Dun Liang |
 | lipeng-unisound | Peng Li |
 | liuyuan | Yuan Liu |
 | livc | Zhao Li |
--- a/cmake/generic.cmake
+++ b/cmake/generic.cmake
@ -349,10 +349,17 @@ function(cc_test TARGET_NAME)
    set(oneValueArgs "")
    set(multiValueArgs SRCS DEPS ARGS)
    cmake_parse_arguments(cc_test "${options}" "${oneValueArgs}" "${multiValueArgs}" ${ARGN})
+    if(WIN32)
+      list(APPEND win32_deps shlwapi)
+      if("${cc_test_DEPS};" MATCHES "python;")
+        list(REMOVE_ITEM cc_test_DEPS python)
+        list(APPEND win32_deps ${PYTHON_LIBRARIES})
+      endif()
+    endif(WIN32)
    add_executable(${TARGET_NAME} ${cc_test_SRCS})
    target_link_libraries(${TARGET_NAME} ${cc_test_DEPS} paddle_gtest_main lod_tensor memory gtest gflags glog)
    if(WIN32)
-      target_link_libraries(${TARGET_NAME} shlwapi)
+      target_link_libraries(${TARGET_NAME} ${win32_deps})
    endif(WIN32)
    add_dependencies(${TARGET_NAME} ${cc_test_DEPS} paddle_gtest_main lod_tensor memory gtest gflags glog)
    add_test(NAME ${TARGET_NAME}
@ -679,7 +686,7 @@ function(py_test TARGET_NAME)
    set(multiValueArgs SRCS DEPS ARGS ENVS)
    cmake_parse_arguments(py_test "${options}" "${oneValueArgs}" "${multiValueArgs}" ${ARGN})
    add_test(NAME ${TARGET_NAME}
-             COMMAND env FLAGS_init_allocated_mem=true FLAGS_cudnn_deterministic=true
+             COMMAND ${CMAKE_COMMAND} -E env FLAGS_init_allocated_mem=true FLAGS_cudnn_deterministic=true
             FLAGS_cpu_deterministic=true
             PYTHONPATH=${PADDLE_BINARY_DIR}/python ${py_test_ENVS}
             ${PYTHON_EXECUTABLE} -u ${py_test_SRCS} ${py_test_ARGS}
--- a/paddle/fluid/API.spec
+++ b/paddle/fluid/API.spec
@ -103,6 +103,7 @@ paddle.fluid.layers.beam_search ArgSpec(args=['pre_ids', 'pre_scores', 'ids', 's
 paddle.fluid.layers.row_conv ArgSpec(args=['input', 'future_context_size', 'param_attr', 'act'], varargs=None, keywords=None, defaults=(None, None))
 paddle.fluid.layers.multiplex ArgSpec(args=['inputs', 'index'], varargs=None, keywords=None, defaults=None)
 paddle.fluid.layers.layer_norm ArgSpec(args=['input', 'scale', 'shift', 'begin_norm_axis', 'epsilon', 'param_attr', 'bias_attr', 'act', 'name'], varargs=None, keywords=None, defaults=(True, True, 1, 1e-05, None, None, None, None))
+paddle.fluid.layers.group_norm ArgSpec(args=['input', 'groups', 'epsilon', 'param_attr', 'bias_attr', 'act', 'data_layout', 'name'], varargs=None, keywords=None, defaults=(1e-05, None, None, None, 'NCHW', None))
 paddle.fluid.layers.softmax_with_cross_entropy ArgSpec(args=['logits', 'label', 'soft_label', 'ignore_index', 'numeric_stable_mode', 'return_softmax'], varargs=None, keywords=None, defaults=(False, -100, False, False))
 paddle.fluid.layers.smooth_l1 ArgSpec(args=['x', 'y', 'inside_weight', 'outside_weight', 'sigma'], varargs=None, keywords=None, defaults=(None, None, None))
 paddle.fluid.layers.one_hot ArgSpec(args=['input', 'depth'], varargs=None, keywords=None, defaults=None)
--- a/paddle/fluid/framework/details/all_reduce_op_handle.cc
+++ b/paddle/fluid/framework/details/all_reduce_op_handle.cc
@ -23,7 +23,7 @@ namespace paddle {
 namespace framework {
 namespace details {

-#ifdef PADDLE_WITH_CUDA
+#if defined(PADDLE_WITH_CUDA) && !defined(_WIN32)
 AllReduceOpHandle::AllReduceOpHandle(ir::Node *node,
                                     const std::vector<Scope *> &local_scopes,
                                     const std::vector<platform::Place> &places,
@ -74,7 +74,7 @@ void AllReduceOpHandle::RunImpl() {
    }

    if (platform::is_gpu_place(lod_tensors[0]->place())) {
-#ifdef PADDLE_WITH_CUDA
+#if defined(PADDLE_WITH_CUDA) && !defined(_WIN32)
      PADDLE_ENFORCE(nccl_ctxs_, "nccl_ctxs should not be nullptr.");
      int dtype = -1;
      size_t numel = 0;
--- a/paddle/fluid/framework/details/all_reduce_op_handle.h
+++ b/paddle/fluid/framework/details/all_reduce_op_handle.h
@ -20,7 +20,7 @@
 #include "paddle/fluid/framework/details/op_handle_base.h"
 #include "paddle/fluid/framework/lod_tensor.h"
 #include "paddle/fluid/framework/scope.h"
-#ifdef PADDLE_WITH_CUDA
+#if defined(PADDLE_WITH_CUDA) && !defined(_WIN32)
 #include "paddle/fluid/platform/nccl_helper.h"
 #endif

@ -29,7 +29,7 @@ namespace framework {
 namespace details {

 struct AllReduceOpHandle : public OpHandleBase {
-#ifdef PADDLE_WITH_CUDA
+#if defined(PADDLE_WITH_CUDA) && !defined(_WIN32)
  AllReduceOpHandle(ir::Node *node, const std::vector<Scope *> &local_scopes,
                    const std::vector<platform::Place> &places,
                    const platform::NCCLContextMap *ctxs);
@ -49,7 +49,7 @@ struct AllReduceOpHandle : public OpHandleBase {
 private:
  std::vector<Scope *> local_scopes_;
  std::vector<platform::Place> places_;
-#ifdef PADDLE_WITH_CUDA
+#if defined(PADDLE_WITH_CUDA) && !defined(_WIN32)
  const platform::NCCLContextMap *nccl_ctxs_;
 #endif
 };
--- a/paddle/fluid/framework/details/broadcast_op_handle.cc
+++ b/paddle/fluid/framework/details/broadcast_op_handle.cc
@ -82,7 +82,7 @@ void BroadcastOpHandle::BroadcastOneVar(
      });
    }
  } else {
-#ifdef PADDLE_WITH_CUDA
+#if defined(PADDLE_WITH_CUDA) && !defined(_WIN32)
    VarHandle *out_handle = nullptr;
    int root_id = boost::get<platform::CUDAPlace>(in_tensor.place()).device;
    std::vector<std::function<void()>> broadcast_calls;
--- a/paddle/fluid/framework/details/broadcast_op_handle.h
+++ b/paddle/fluid/framework/details/broadcast_op_handle.h
@ -24,7 +24,7 @@
 #include "paddle/fluid/framework/selected_rows.h"
 #include "paddle/fluid/platform/device_context.h"

-#ifdef PADDLE_WITH_CUDA
+#if defined(PADDLE_WITH_CUDA) && !defined(_WIN32)
 #include "paddle/fluid/platform/nccl_helper.h"
 #endif

@ -34,7 +34,7 @@ namespace details {

 struct BroadcastOpHandle : public OpHandleBase {
 public:
-#ifdef PADDLE_WITH_CUDA
+#if defined(PADDLE_WITH_CUDA) && !defined(_WIN32)
  BroadcastOpHandle(ir::Node *node, const std::vector<Scope *> &local_scopes,
                    const std::vector<platform::Place> &places,
                    const platform::NCCLContextMap *nccl_ctxs)
@ -68,7 +68,7 @@ struct BroadcastOpHandle : public OpHandleBase {

  std::vector<Scope *> local_scopes_;
  std::vector<platform::Place> places_;
-#ifdef PADDLE_WITH_CUDA
+#if defined(PADDLE_WITH_CUDA) && !defined(_WIN32)
  const platform::NCCLContextMap *nccl_ctxs_;
 #endif

--- a/paddle/fluid/framework/details/broadcast_op_handle_test.h
+++ b/paddle/fluid/framework/details/broadcast_op_handle_test.h
@ -42,7 +42,7 @@ struct TestBroadcastOpHandle {
  std::vector<std::unique_ptr<ir::Node>> nodes_;
  std::vector<p::Place> place_list_;
  bool use_gpu_;
-#ifdef PADDLE_WITH_CUDA
+#if defined(PADDLE_WITH_CUDA) && !defined(_WIN32)
  std::unique_ptr<platform::NCCLContextMap> nccl_ctxs_;
 #endif

@ -50,7 +50,7 @@ struct TestBroadcastOpHandle {
    for (size_t j = 0; j < ctxs_.size(); ++j) {
      ctxs_[j]->Wait();
    }
-#ifdef PADDLE_WITH_CUDA
+#if defined(PADDLE_WITH_CUDA) && !defined(_WIN32)
    if (nccl_ctxs_) {
      nccl_ctxs_->WaitAll();
    }
@ -60,7 +60,7 @@ struct TestBroadcastOpHandle {
  void InitCtxOnGpu(bool use_gpu) {
    use_gpu_ = use_gpu;
    if (use_gpu_) {
-#ifdef PADDLE_WITH_CUDA
+#if defined(PADDLE_WITH_CUDA) && !defined(_WIN32)
      int count = p::GetCUDADeviceCount();
      if (count <= 1) {
        LOG(WARNING) << "Cannot test multi-gpu Broadcast, because the CUDA "
@ -84,7 +84,7 @@ struct TestBroadcastOpHandle {
        place_list_.push_back(p);
        ctxs_.emplace_back(new p::CPUDeviceContext(p));
      }
-#ifdef PADDLE_WITH_CUDA
+#if defined(PADDLE_WITH_CUDA) && !defined(_WIN32)
      nccl_ctxs_.reset(nullptr);
 #endif
    }
@ -106,14 +106,14 @@ struct TestBroadcastOpHandle {
    nodes_.emplace_back(
        ir::CreateNodeForTest("node0", ir::Node::Type::kOperation));
    if (use_gpu_) {
-#ifdef PADDLE_WITH_CUDA
+#if defined(PADDLE_WITH_CUDA) && !defined(_WIN32)
      op_handle_ = new BroadcastOpHandle(nodes_.back().get(), local_scopes_,
                                         place_list_, nccl_ctxs_.get());
 #else
      PADDLE_THROW("CUDA is not support.");
 #endif
    } else {
-#ifdef PADDLE_WITH_CUDA
+#if defined(PADDLE_WITH_CUDA) && !defined(_WIN32)
      op_handle_ = new BroadcastOpHandle(nodes_.back().get(), local_scopes_,
                                         place_list_, nccl_ctxs_.get());
 #else
--- a/paddle/fluid/framework/details/build_strategy.cc
+++ b/paddle/fluid/framework/details/build_strategy.cc
@ -96,7 +96,7 @@ std::unique_ptr<ir::Graph> BuildStrategy::Apply(
    const std::string &loss_var_name,
    const std::unordered_set<std::string> &param_names,
    const std::vector<Scope *> &local_scopes,
-#ifdef PADDLE_WITH_CUDA
+#if defined(PADDLE_WITH_CUDA) && !defined(_WIN32)
    const bool use_cuda, platform::NCCLContextMap *nccl_ctxs) const {
 #else
    const bool use_cuda) const {
@ -118,7 +118,7 @@ std::unique_ptr<ir::Graph> BuildStrategy::Apply(
      pass->Erase("local_scopes");
      pass->SetNotOwned<const std::vector<Scope *>>("local_scopes",
                                                    &local_scopes);
-#ifdef PADDLE_WITH_CUDA
+#if defined(PADDLE_WITH_CUDA) && !defined(_WIN32)
      platform::NCCLContextMap *nctx = use_cuda ? nccl_ctxs : nullptr;
      pass->Erase("nccl_ctxs");
      pass->SetNotOwned<platform::NCCLContextMap>("nccl_ctxs", nctx);
--- a/paddle/fluid/framework/details/build_strategy.h
+++ b/paddle/fluid/framework/details/build_strategy.h
@ -23,7 +23,7 @@
 #include "paddle/fluid/platform/device_context.h"
 #include "paddle/fluid/platform/enforce.h"

-#ifdef PADDLE_WITH_CUDA
+#if defined(PADDLE_WITH_CUDA) && !defined(_WIN32)
 #include "paddle/fluid/platform/nccl_helper.h"
 #endif

@ -98,7 +98,7 @@ struct BuildStrategy {
      const std::string &loss_var_name,
      const std::unordered_set<std::string> &param_names,
      const std::vector<Scope *> &local_scopes,
-#ifdef PADDLE_WITH_CUDA
+#if defined(PADDLE_WITH_CUDA) && !defined(_WIN32)
      const bool use_cuda, platform::NCCLContextMap *nccl_ctxs) const;
 #else
      const bool use_cuda) const;
--- a/paddle/fluid/framework/details/data_balance_op_handle.cc
+++ b/paddle/fluid/framework/details/data_balance_op_handle.cc
@ -20,7 +20,7 @@ namespace paddle {
 namespace framework {
 namespace details {

-#ifdef PADDLE_WITH_CUDA
+#if defined(PADDLE_WITH_CUDA) && !defined(_WIN32)
 DataBalanceOpHandle::DataBalanceOpHandle(
    ir::Node *node, const std::vector<Scope *> &local_scopes,
    const std::vector<platform::Place> &places,
--- a/paddle/fluid/framework/details/data_balance_op_handle.h
+++ b/paddle/fluid/framework/details/data_balance_op_handle.h
@ -19,7 +19,7 @@
 #include "paddle/fluid/framework/details/op_handle_base.h"
 #include "paddle/fluid/framework/lod_tensor.h"
 #include "paddle/fluid/framework/scope.h"
-#ifdef PADDLE_WITH_CUDA
+#if defined(PADDLE_WITH_CUDA) && !defined(_WIN32)
 #include "paddle/fluid/platform/nccl_helper.h"
 #endif

@ -29,7 +29,7 @@ namespace details {

 struct DataBalanceOpHandle : public OpHandleBase {
 public:
-#ifdef PADDLE_WITH_CUDA
+#if defined(PADDLE_WITH_CUDA) && !defined(_WIN32)
  DataBalanceOpHandle(ir::Node *node, const std::vector<Scope *> &local_scopes,
                      const std::vector<platform::Place> &places,
                      const platform::NCCLContextMap *ctxs);
--- a/paddle/fluid/framework/details/fused_broadcast_op_handle.h
+++ b/paddle/fluid/framework/details/fused_broadcast_op_handle.h
@ -25,7 +25,7 @@
 #include "paddle/fluid/framework/selected_rows.h"
 #include "paddle/fluid/platform/device_context.h"

-#ifdef PADDLE_WITH_CUDA
+#if defined(PADDLE_WITH_CUDA) && !defined(_WIN32)
 #include "paddle/fluid/platform/nccl_helper.h"
 #endif

@ -35,7 +35,7 @@ namespace details {

 struct FusedBroadcastOpHandle : public BroadcastOpHandle {
 public:
-#ifdef PADDLE_WITH_CUDA
+#if defined(PADDLE_WITH_CUDA) && !defined(_WIN32)
  FusedBroadcastOpHandle(ir::Node *node,
                         const std::vector<Scope *> local_scopes,
                         const std::vector<platform::Place> &places,
--- a/paddle/fluid/framework/details/fused_broadcast_op_handle_test.cc
+++ b/paddle/fluid/framework/details/fused_broadcast_op_handle_test.cc
@ -44,14 +44,14 @@ struct TestFusedBroadcastOpHandle : TestBroadcastOpHandle {
    nodes_.emplace_back(
        ir::CreateNodeForTest("fused_broadcast", ir::Node::Type::kOperation));
    if (use_gpu_) {
-#ifdef PADDLE_WITH_CUDA
+#if defined(PADDLE_WITH_CUDA) && !defined(_WIN32)
      op_handle_ = new FusedBroadcastOpHandle(
          nodes_.back().get(), local_scopes_, place_list_, nccl_ctxs_.get());
 #else
      PADDLE_THROW("CUDA is not supported.");
 #endif
    } else {
-#ifdef PADDLE_WITH_CUDA
+#if defined(PADDLE_WITH_CUDA) && !defined(_WIN32)
      op_handle_ = new FusedBroadcastOpHandle(
          nodes_.back().get(), local_scopes_, place_list_, nccl_ctxs_.get());
 #else
--- a/paddle/fluid/framework/details/multi_devices_graph_pass.cc
+++ b/paddle/fluid/framework/details/multi_devices_graph_pass.cc
@ -142,7 +142,7 @@ void MultiDevSSAGraphBuilder::Init() const {
  places_ = Get<const std::vector<platform::Place>>(kPlaces);
  local_scopes_ = Get<const std::vector<Scope *>>(kLocalScopes);
  strategy_ = Get<const BuildStrategy>(kStrategy);
-#ifdef PADDLE_WITH_CUDA
+#if defined(PADDLE_WITH_CUDA) && !defined(_WIN32)
  nccl_ctxs_ = &Get<platform::NCCLContextMap>("nccl_ctxs");
 #endif

@ -431,7 +431,7 @@ std::unique_ptr<ir::Graph> MultiDevSSAGraphBuilder::ApplyImpl(
    }
  }
  bool use_gpu = false;
-#ifdef PADDLE_WITH_CUDA
+#if defined(PADDLE_WITH_CUDA) && !defined(_WIN32)
  use_gpu = nccl_ctxs_ != nullptr;
 #endif

@ -478,7 +478,7 @@ bool MultiDevSSAGraphBuilder::IsSparseGradient(const std::string &og) const {

 void MultiDevSSAGraphBuilder::SetCommunicationContext(
    OpHandleBase *op_handle, const platform::Place &p) const {
-#ifdef PADDLE_WITH_CUDA
+#if defined(PADDLE_WITH_CUDA) && !defined(_WIN32)
  if (nccl_ctxs_ == nullptr) {
    op_handle->SetDeviceContext(p,
                                platform::DeviceContextPool::Instance().Get(p));
@ -492,7 +492,7 @@ void MultiDevSSAGraphBuilder::SetCommunicationContext(
 void MultiDevSSAGraphBuilder::CreateBroadcastOp(ir::Graph *result,
                                                const std::string &p_name,
                                                size_t src_dev_id) const {
-#ifdef PADDLE_WITH_CUDA
+#if defined(PADDLE_WITH_CUDA) && !defined(_WIN32)
  auto *op_handle = new BroadcastOpHandle(
      result->CreateEmptyNode("broadcast", ir::Node::Type::kOperation),
      local_scopes_, places_, nccl_ctxs_);
@ -522,7 +522,7 @@ void MultiDevSSAGraphBuilder::CreateBroadcastOp(ir::Graph *result,
 void MultiDevSSAGraphBuilder::CreateFusedBroadcastOp(
    ir::Graph *result,
    const std::vector<std::unordered_set<std::string>> &bcast_varnames) const {
-#ifdef PADDLE_WITH_CUDA
+#if defined(PADDLE_WITH_CUDA) && !defined(_WIN32)
  auto *op_handle = new FusedBroadcastOpHandle(
      result->CreateEmptyNode("fused_broadcast", ir::Node::Type::kOperation),
      local_scopes_, places_, nccl_ctxs_);
@ -568,7 +568,7 @@ void MultiDevSSAGraphBuilder::CreateComputationalOp(ir::Graph *result,

 void MultiDevSSAGraphBuilder::InsertAllReduceOp(ir::Graph *result,
                                                const std::string &og) const {
-#ifdef PADDLE_WITH_CUDA
+#if defined(PADDLE_WITH_CUDA) && !defined(_WIN32)
  result->Get<GraphOps>(kGraphOps).emplace_back(new AllReduceOpHandle(
      result->CreateEmptyNode("allreduce", ir::Node::Type::kOperation),
      local_scopes_, places_, nccl_ctxs_));
@ -597,7 +597,7 @@ void MultiDevSSAGraphBuilder::InsertAllReduceOp(ir::Graph *result,

 void MultiDevSSAGraphBuilder::InsertDataBalanceOp(
    ir::Graph *result, const std::vector<std::string> &datas) const {
-#ifdef PADDLE_WITH_CUDA
+#if defined(PADDLE_WITH_CUDA) && !defined(_WIN32)
  result->Get<GraphOps>(kGraphOps).emplace_back(new DataBalanceOpHandle(
      result->CreateEmptyNode("data_balance", ir::Node::Type::kOperation),
      local_scopes_, places_, nccl_ctxs_));
@ -694,7 +694,7 @@ void MultiDevSSAGraphBuilder::CreateComputationalOps(ir::Graph *result,
 VarHandle *MultiDevSSAGraphBuilder::CreateReduceOp(ir::Graph *result,
                                                   const std::string &og,
                                                   int dst_dev_id) const {
-#ifdef PADDLE_WITH_CUDA
+#if defined(PADDLE_WITH_CUDA) && !defined(_WIN32)
  result->Get<GraphOps>(kGraphOps).emplace_back(new ReduceOpHandle(
      result->CreateEmptyNode("reduce", ir::Node::Type::kOperation),
      local_scopes_, places_, nccl_ctxs_));
--- a/paddle/fluid/framework/details/multi_devices_graph_pass.h
+++ b/paddle/fluid/framework/details/multi_devices_graph_pass.h
@ -40,7 +40,7 @@ class MultiDevSSAGraphBuilder : public ir::Pass {
                         size_t device_id) const;
  void Init() const;

-#ifdef PADDLE_WITH_CUDA
+#if defined(PADDLE_WITH_CUDA) && !defined(_WIN32)
  mutable platform::NCCLContextMap *nccl_ctxs_;
 #endif

--- a/paddle/fluid/framework/details/reduce_op_handle.cc
+++ b/paddle/fluid/framework/details/reduce_op_handle.cc
@ -125,7 +125,7 @@ void ReduceOpHandle::RunImpl() {
        }
      });
    } else if (paddle::platform::is_gpu_place(lod_tensors[0]->place())) {
-#ifdef PADDLE_WITH_CUDA
+#if defined(PADDLE_WITH_CUDA) && !defined(_WIN32)
      auto pre_in = pre_in_var->Get<framework::LoDTensor>();
      VariableVisitor::ShareDimsAndLoD(*pre_in_var, out_var);
      VariableVisitor::GetMutableTensor(out_var).mutable_data(
--- a/paddle/fluid/framework/details/reduce_op_handle.h
+++ b/paddle/fluid/framework/details/reduce_op_handle.h
@ -23,7 +23,7 @@
 #include "paddle/fluid/framework/scope.h"
 #include "paddle/fluid/framework/selected_rows.h"
 #include "paddle/fluid/platform/device_context.h"
-#ifdef PADDLE_WITH_CUDA
+#if defined(PADDLE_WITH_CUDA) && !defined(_WIN32)
 #include "paddle/fluid/platform/nccl_helper.h"
 #endif

@ -35,7 +35,7 @@ struct ReduceOpHandle : public OpHandleBase {
  std::vector<Scope *> local_scopes_;
  std::vector<platform::Place> places_;

-#ifdef PADDLE_WITH_CUDA
+#if defined(PADDLE_WITH_CUDA) && !defined(_WIN32)
  const platform::NCCLContextMap *nccl_ctxs_;
  ReduceOpHandle(ir::Node *node, const std::vector<Scope *> &local_scopes,
                 const std::vector<platform::Place> &places,
--- a/paddle/fluid/framework/details/reduce_op_handle_test.cc
+++ b/paddle/fluid/framework/details/reduce_op_handle_test.cc
@ -35,7 +35,7 @@ struct TestReduceOpHandle {
  std::vector<p::Place> gpu_list_;
  std::vector<std::unique_ptr<p::DeviceContext>> ctxs_;

-#ifdef PADDLE_WITH_CUDA
+#if defined(PADDLE_WITH_CUDA) && !defined(_WIN32)
  std::unique_ptr<platform::NCCLContextMap> nccl_ctxs_;
 #endif

@ -43,7 +43,7 @@ struct TestReduceOpHandle {
    for (size_t j = 0; j < ctxs_.size(); ++j) {
      ctxs_[j]->Wait();
    }
-#ifdef PADDLE_WITH_CUDA
+#if defined(PADDLE_WITH_CUDA) && !defined(_WIN32)
    if (nccl_ctxs_) {
      nccl_ctxs_->WaitAll();
    }
@ -53,7 +53,7 @@ struct TestReduceOpHandle {
  void InitCtxOnGpu(bool use_gpu) {
    use_gpu_ = use_gpu;
    if (use_gpu) {
-#ifdef PADDLE_WITH_CUDA
+#if defined(PADDLE_WITH_CUDA) && !defined(_WIN32)
      int count = p::GetCUDADeviceCount();
      if (count <= 1) {
        LOG(WARNING) << "Cannot test multi-gpu Broadcast, because the CUDA "
@ -77,7 +77,7 @@ struct TestReduceOpHandle {
        gpu_list_.push_back(p);
        ctxs_.emplace_back(new p::CPUDeviceContext(p));
      }
-#ifdef PADDLE_WITH_CUDA
+#if defined(PADDLE_WITH_CUDA) && !defined(_WIN32)
      nccl_ctxs_.reset(nullptr);
 #endif
    }
@ -99,14 +99,14 @@ struct TestReduceOpHandle {

    nodes.emplace_back(new ir::Node("node"));
    if (use_gpu_) {
-#ifdef PADDLE_WITH_CUDA
+#if defined(PADDLE_WITH_CUDA) && !defined(_WIN32)
      op_handle_.reset(new ReduceOpHandle(nodes.back().get(), local_scopes_,
                                          gpu_list_, nccl_ctxs_.get()));
 #else
      PADDLE_THROW("CUDA is not support.");
 #endif
    } else {
-#ifdef PADDLE_WITH_CUDA
+#if defined(PADDLE_WITH_CUDA) && !defined(_WIN32)
      op_handle_.reset(new ReduceOpHandle(nodes.back().get(), local_scopes_,
                                          gpu_list_, nccl_ctxs_.get()));
 #else
--- a/paddle/fluid/framework/ir/is_test_pass_tester.cc
+++ b/paddle/fluid/framework/ir/is_test_pass_tester.cc
@ -15,7 +15,10 @@
 #include "paddle/fluid/framework/ir/is_test_pass.h"

 #include <gtest/gtest.h>
-
+#ifdef _WIN32
+#undef FALSE
+#undef TRUE
+#endif
 namespace paddle {
 namespace framework {
 namespace ir {
--- a/paddle/fluid/inference/analysis/analyzer_tester.cc
+++ b/paddle/fluid/inference/analysis/analyzer_tester.cc
@ -19,6 +19,7 @@
 #include "paddle/fluid/inference/analysis/ut_helper.h"
 #include "paddle/fluid/inference/api/paddle_inference_api.h"
 #include "paddle/fluid/inference/api/paddle_inference_pass.h"
+#include "paddle/fluid/platform/port.h"

 namespace paddle {
 namespace inference {
@ -75,7 +76,7 @@ void TestWord2vecPrediction(const std::string& model_path) {
                     0.000932706};
  const size_t num_elements = outputs.front().data.length() / sizeof(float);
  // The outputs' buffers are in CPU memory.
-  for (size_t i = 0; i < std::min(5UL, num_elements); i++) {
+  for (size_t i = 0; i < std::min((size_t)5UL, num_elements); i++) {
    LOG(INFO) << "data: "
              << static_cast<float*>(outputs.front().data.data())[i];
    PADDLE_ENFORCE(static_cast<float*>(outputs.front().data.data())[i],
--- a/paddle/fluid/inference/api/helper.h
+++ b/paddle/fluid/inference/api/helper.h
@ -15,10 +15,6 @@
 #pragma once

 #include <glog/logging.h>
-#if !defined(_WIN32)
-#include <sys/time.h>
-#else
-#endif

 #include <algorithm>
 #include <chrono>  // NOLINT
@ -28,6 +24,7 @@
 #include <string>
 #include <vector>
 #include "paddle/fluid/inference/api/paddle_inference_api.h"
+#include "paddle/fluid/platform/port.h"
 #include "paddle/fluid/string/printf.h"

 namespace paddle {
--- a/paddle/fluid/inference/tests/api/anakin_rnn1_tester.cc
+++ b/paddle/fluid/inference/tests/api/anakin_rnn1_tester.cc
@ -13,7 +13,6 @@ See the License for the specific language governing permissions and
 limitations under the License. */

 #include <gflags/gflags.h>
-#include <sys/time.h>
 #include <time.h>
 #include <algorithm>
 #include <fstream>
--- a/paddle/fluid/inference/tests/book/test_inference_nlp.cc
+++ b/paddle/fluid/inference/tests/book/test_inference_nlp.cc
@ -12,7 +12,6 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License. */

-#include <sys/time.h>
 #include <time.h>
 #include <fstream>
 #include <thread>  // NOLINT
--- a/paddle/fluid/inference/tests/test_helper.h
+++ b/paddle/fluid/inference/tests/test_helper.h
@ -20,6 +20,7 @@ limitations under the License. */

 #include "paddle/fluid/framework/lod_tensor.h"
 #include "paddle/fluid/inference/io.h"
+#include "paddle/fluid/platform/port.h"
 #include "paddle/fluid/platform/profiler.h"

 DECLARE_bool(use_mkldnn);
--- a/Show More
+++ b/Show More