Add collective ops (reduce) (#26340)
	
		
	
				
					
				
			
							parent
							
								
									bdb805505e
								
							
						
					
					
						commit
						e92f770c42
					
				@ -0,0 +1,39 @@
 | 
				
			||||
/* Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
 | 
				
			||||
 | 
				
			||||
Licensed under the Apache License, Version 2.0 (the "License");
 | 
				
			||||
you may not use this file except in compliance with the License.
 | 
				
			||||
You may obtain a copy of the License at
 | 
				
			||||
 | 
				
			||||
    http://www.apache.org/licenses/LICENSE-2.0
 | 
				
			||||
 | 
				
			||||
Unless required by applicable law or agreed to in writing, software
 | 
				
			||||
distributed under the License is distributed on an "AS IS" BASIS,
 | 
				
			||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 | 
				
			||||
See the License for the specific language governing permissions and
 | 
				
			||||
limitations under the License. */
 | 
				
			||||
 | 
				
			||||
#include "paddle/fluid/operators/collective/c_reduce_op.h"
 | 
				
			||||
 | 
				
			||||
namespace paddle {
 | 
				
			||||
namespace operators {
 | 
				
			||||
 | 
				
			||||
class CReduceMaxOpMaker : public CReduceOpMaker {
 | 
				
			||||
 protected:
 | 
				
			||||
  std::string GetName() const override { return "Max"; }
 | 
				
			||||
};
 | 
				
			||||
 | 
				
			||||
}  // namespace operators
 | 
				
			||||
}  // namespace paddle
 | 
				
			||||
 | 
				
			||||
namespace ops = paddle::operators;
 | 
				
			||||
namespace plat = paddle::platform;
 | 
				
			||||
 | 
				
			||||
REGISTER_OP_WITHOUT_GRADIENT(c_reduce_max, ops::CReduceOp,
 | 
				
			||||
                             ops::CReduceMaxOpMaker);
 | 
				
			||||
 | 
				
			||||
REGISTER_OP_CPU_KERNEL(c_reduce_max,
 | 
				
			||||
                       ops::CReduceOpCPUKernel<ops::kRedMax, float>,
 | 
				
			||||
                       ops::CReduceOpCPUKernel<ops::kRedMax, double>,
 | 
				
			||||
                       ops::CReduceOpCPUKernel<ops::kRedMax, int>,
 | 
				
			||||
                       ops::CReduceOpCPUKernel<ops::kRedMax, int64_t>,
 | 
				
			||||
                       ops::CReduceOpCPUKernel<ops::kRedMax, plat::float16>);
 | 
				
			||||
@ -0,0 +1,25 @@
 | 
				
			||||
/* Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
 | 
				
			||||
 | 
				
			||||
Licensed under the Apache License, Version 2.0 (the "License");
 | 
				
			||||
you may not use this file except in compliance with the License.
 | 
				
			||||
You may obtain a copy of the License at
 | 
				
			||||
 | 
				
			||||
    http://www.apache.org/licenses/LICENSE-2.0
 | 
				
			||||
 | 
				
			||||
Unless required by applicable law or agreed to in writing, software
 | 
				
			||||
distributed under the License is distributed on an "AS IS" BASIS,
 | 
				
			||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 | 
				
			||||
See the License for the specific language governing permissions and
 | 
				
			||||
limitations under the License. */
 | 
				
			||||
 | 
				
			||||
#include "paddle/fluid/operators/collective/c_reduce_op.h"
 | 
				
			||||
 | 
				
			||||
namespace ops = paddle::operators;
 | 
				
			||||
namespace plat = paddle::platform;
 | 
				
			||||
 | 
				
			||||
REGISTER_OP_CUDA_KERNEL(c_reduce_max,
 | 
				
			||||
                        ops::CReduceOpCUDAKernel<ops::kRedMax, float>,
 | 
				
			||||
                        ops::CReduceOpCUDAKernel<ops::kRedMax, double>,
 | 
				
			||||
                        ops::CReduceOpCUDAKernel<ops::kRedMax, int>,
 | 
				
			||||
                        ops::CReduceOpCUDAKernel<ops::kRedMax, int64_t>,
 | 
				
			||||
                        ops::CReduceOpCUDAKernel<ops::kRedMax, plat::float16>)
 | 
				
			||||
@ -0,0 +1,39 @@
 | 
				
			||||
/* Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
 | 
				
			||||
 | 
				
			||||
Licensed under the Apache License, Version 2.0 (the "License");
 | 
				
			||||
you may not use this file except in compliance with the License.
 | 
				
			||||
You may obtain a copy of the License at
 | 
				
			||||
 | 
				
			||||
    http://www.apache.org/licenses/LICENSE-2.0
 | 
				
			||||
 | 
				
			||||
Unless required by applicable law or agreed to in writing, software
 | 
				
			||||
distributed under the License is distributed on an "AS IS" BASIS,
 | 
				
			||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 | 
				
			||||
See the License for the specific language governing permissions and
 | 
				
			||||
limitations under the License. */
 | 
				
			||||
 | 
				
			||||
#include "paddle/fluid/operators/collective/c_reduce_op.h"
 | 
				
			||||
 | 
				
			||||
namespace paddle {
 | 
				
			||||
namespace operators {
 | 
				
			||||
 | 
				
			||||
class CReduceMinOpMaker : public CReduceOpMaker {
 | 
				
			||||
 protected:
 | 
				
			||||
  std::string GetName() const override { return "Min"; }
 | 
				
			||||
};
 | 
				
			||||
 | 
				
			||||
}  // namespace operators
 | 
				
			||||
}  // namespace paddle
 | 
				
			||||
 | 
				
			||||
namespace ops = paddle::operators;
 | 
				
			||||
namespace plat = paddle::platform;
 | 
				
			||||
 | 
				
			||||
REGISTER_OP_WITHOUT_GRADIENT(c_reduce_min, ops::CReduceOp,
 | 
				
			||||
                             ops::CReduceMinOpMaker);
 | 
				
			||||
 | 
				
			||||
REGISTER_OP_CPU_KERNEL(c_reduce_min,
 | 
				
			||||
                       ops::CReduceOpCPUKernel<ops::kRedMin, float>,
 | 
				
			||||
                       ops::CReduceOpCPUKernel<ops::kRedMin, double>,
 | 
				
			||||
                       ops::CReduceOpCPUKernel<ops::kRedMin, int>,
 | 
				
			||||
                       ops::CReduceOpCPUKernel<ops::kRedMin, int64_t>,
 | 
				
			||||
                       ops::CReduceOpCPUKernel<ops::kRedMin, plat::float16>);
 | 
				
			||||
@ -0,0 +1,25 @@
 | 
				
			||||
/* Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
 | 
				
			||||
 | 
				
			||||
Licensed under the Apache License, Version 2.0 (the "License");
 | 
				
			||||
you may not use this file except in compliance with the License.
 | 
				
			||||
You may obtain a copy of the License at
 | 
				
			||||
 | 
				
			||||
    http://www.apache.org/licenses/LICENSE-2.0
 | 
				
			||||
 | 
				
			||||
Unless required by applicable law or agreed to in writing, software
 | 
				
			||||
distributed under the License is distributed on an "AS IS" BASIS,
 | 
				
			||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 | 
				
			||||
See the License for the specific language governing permissions and
 | 
				
			||||
limitations under the License. */
 | 
				
			||||
 | 
				
			||||
#include "paddle/fluid/operators/collective/c_reduce_op.h"
 | 
				
			||||
 | 
				
			||||
namespace ops = paddle::operators;
 | 
				
			||||
namespace plat = paddle::platform;
 | 
				
			||||
 | 
				
			||||
REGISTER_OP_CUDA_KERNEL(c_reduce_min,
 | 
				
			||||
                        ops::CReduceOpCUDAKernel<ops::kRedMin, float>,
 | 
				
			||||
                        ops::CReduceOpCUDAKernel<ops::kRedMin, double>,
 | 
				
			||||
                        ops::CReduceOpCUDAKernel<ops::kRedMin, int>,
 | 
				
			||||
                        ops::CReduceOpCUDAKernel<ops::kRedMin, int64_t>,
 | 
				
			||||
                        ops::CReduceOpCUDAKernel<ops::kRedMin, plat::float16>)
 | 
				
			||||
@ -0,0 +1,151 @@
 | 
				
			||||
/* Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
 | 
				
			||||
 | 
				
			||||
Licensed under the Apache License, Version 2.0 (the "License");
 | 
				
			||||
you may not use this file except in compliance with the License.
 | 
				
			||||
You may obtain a copy of the License at
 | 
				
			||||
 | 
				
			||||
    http://www.apache.org/licenses/LICENSE-2.0
 | 
				
			||||
 | 
				
			||||
Unless required by applicable law or agreed to in writing, software
 | 
				
			||||
distributed under the License is distributed on an "AS IS" BASIS,
 | 
				
			||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 | 
				
			||||
See the License for the specific language governing permissions and
 | 
				
			||||
limitations under the License. */
 | 
				
			||||
 | 
				
			||||
#pragma once
 | 
				
			||||
 | 
				
			||||
#include <algorithm>
 | 
				
			||||
#include <string>
 | 
				
			||||
#include <utility>
 | 
				
			||||
#include <vector>
 | 
				
			||||
 | 
				
			||||
#include "paddle/fluid/framework/data_type.h"
 | 
				
			||||
#include "paddle/fluid/framework/ddim.h"
 | 
				
			||||
#include "paddle/fluid/framework/lod_tensor.h"
 | 
				
			||||
#include "paddle/fluid/framework/op_registry.h"
 | 
				
			||||
 | 
				
			||||
#if defined(PADDLE_WITH_NCCL)
 | 
				
			||||
#include "paddle/fluid/platform/collective_helper.h"
 | 
				
			||||
#include "paddle/fluid/platform/nccl_helper.h"
 | 
				
			||||
#endif
 | 
				
			||||
 | 
				
			||||
namespace paddle {
 | 
				
			||||
namespace operators {
 | 
				
			||||
 | 
				
			||||
enum ReduceType { kRedSum, kRedMax, kRedMin, kRedProd };
 | 
				
			||||
 | 
				
			||||
class CReduceOp : public framework::OperatorWithKernel {
 | 
				
			||||
 public:
 | 
				
			||||
  using framework::OperatorWithKernel::OperatorWithKernel;
 | 
				
			||||
 | 
				
			||||
  void InferShape(framework::InferShapeContext* ctx) const override {
 | 
				
			||||
    ctx->SetOutputDim("Out", ctx->GetInputDim("X"));
 | 
				
			||||
  }
 | 
				
			||||
 | 
				
			||||
 protected:
 | 
				
			||||
  framework::OpKernelType GetExpectedKernelType(
 | 
				
			||||
      const framework::ExecutionContext& ctx) const override {
 | 
				
			||||
    return framework::OpKernelType(
 | 
				
			||||
        OperatorWithKernel::IndicateVarDataType(ctx, "X"), ctx.GetPlace());
 | 
				
			||||
  }
 | 
				
			||||
};
 | 
				
			||||
 | 
				
			||||
template <ReduceType red_type, typename T>
 | 
				
			||||
class CReduceOpCPUKernel : public framework::OpKernel<T> {
 | 
				
			||||
 public:
 | 
				
			||||
  void Compute(const framework::ExecutionContext& ctx) const override {
 | 
				
			||||
    PADDLE_ENFORCE_EQ(
 | 
				
			||||
        true, false,
 | 
				
			||||
        platform::errors::Unavailable("Unimplemented CReduceOpCPUKernel now."));
 | 
				
			||||
  }
 | 
				
			||||
};
 | 
				
			||||
 | 
				
			||||
template <ReduceType red_type, typename T>
 | 
				
			||||
class CReduceOpCUDAKernel : public framework::OpKernel<T> {
 | 
				
			||||
 public:
 | 
				
			||||
  void Compute(const framework::ExecutionContext& ctx) const override {
 | 
				
			||||
#if defined(PADDLE_WITH_NCCL)
 | 
				
			||||
    auto in = ctx.Input<framework::Tensor>("X");
 | 
				
			||||
    auto out = ctx.Output<framework::Tensor>("Out");
 | 
				
			||||
 | 
				
			||||
    auto place = ctx.GetPlace();
 | 
				
			||||
    ncclDataType_t dtype = platform::ToNCCLDataType(in->type());
 | 
				
			||||
    int64_t numel = in->numel();
 | 
				
			||||
    const void* sendbuff = in->data<void>();
 | 
				
			||||
    out->Resize(in->dims());
 | 
				
			||||
    void* recvbuff = out->mutable_data<T>(place);
 | 
				
			||||
 | 
				
			||||
    int rid = ctx.Attr<int>("ring_id");
 | 
				
			||||
    int root = ctx.Attr<int>("root_id");
 | 
				
			||||
    auto comm = platform::NCCLCommContext::Instance().Get(rid, place);
 | 
				
			||||
 | 
				
			||||
    cudaStream_t stream = nullptr;
 | 
				
			||||
    if (ctx.Attr<bool>("use_calc_stream")) {
 | 
				
			||||
      auto dev_ctx = platform::DeviceContextPool::Instance().Get(place);
 | 
				
			||||
      stream = static_cast<platform::CUDADeviceContext*>(dev_ctx)->stream();
 | 
				
			||||
    } else {
 | 
				
			||||
      stream = comm->stream();
 | 
				
			||||
    }
 | 
				
			||||
 | 
				
			||||
    ncclRedOp_t nccl_red_type = ncclSum;
 | 
				
			||||
    switch (red_type) {
 | 
				
			||||
      case kRedSum:
 | 
				
			||||
        nccl_red_type = ncclSum;
 | 
				
			||||
        break;
 | 
				
			||||
 | 
				
			||||
      case kRedMax:
 | 
				
			||||
        nccl_red_type = ncclMax;
 | 
				
			||||
        break;
 | 
				
			||||
 | 
				
			||||
      case kRedMin:
 | 
				
			||||
        nccl_red_type = ncclMin;
 | 
				
			||||
        break;
 | 
				
			||||
 | 
				
			||||
      case kRedProd:
 | 
				
			||||
        nccl_red_type = ncclProd;
 | 
				
			||||
        break;
 | 
				
			||||
 | 
				
			||||
      default:
 | 
				
			||||
        PADDLE_ENFORCE_EQ(true, false, platform::errors::InvalidArgument(
 | 
				
			||||
                                           "red_type must be one of kRedSum, "
 | 
				
			||||
                                           "kRedMax, kRedMin, kRedProd."));
 | 
				
			||||
    }
 | 
				
			||||
 | 
				
			||||
    PADDLE_ENFORCE_CUDA_SUCCESS(platform::dynload::ncclReduce(
 | 
				
			||||
        sendbuff, recvbuff, numel, dtype, nccl_red_type, root, comm->comm(),
 | 
				
			||||
        stream));
 | 
				
			||||
#else
 | 
				
			||||
    PADDLE_ENFORCE_EQ(true, false,
 | 
				
			||||
                      platform::errors::Unavailable(
 | 
				
			||||
                          "PaddlePaddle should compile with GPU.."));
 | 
				
			||||
#endif
 | 
				
			||||
  }
 | 
				
			||||
};
 | 
				
			||||
 | 
				
			||||
class CReduceOpMaker : public framework::OpProtoAndCheckerMaker {
 | 
				
			||||
 public:
 | 
				
			||||
  void Make() {
 | 
				
			||||
    AddInput("X", "(Tensor), tensor to be reduced.");
 | 
				
			||||
    AddOutput("Out", "(Tensor) the reduced result.");
 | 
				
			||||
    AddAttr<int>("ring_id", "(int default 0) communication ring id.")
 | 
				
			||||
        .SetDefault(0);
 | 
				
			||||
    AddAttr<int>("root_id", "(int default 0) root id.").SetDefault(0);
 | 
				
			||||
    AddAttr<bool>(
 | 
				
			||||
        "use_calc_stream",
 | 
				
			||||
        "(bool default false) eject CUDA operations to calculation stream.")
 | 
				
			||||
        .SetDefault(false);
 | 
				
			||||
    AddComment(string::Sprintf(R"DOC(
 | 
				
			||||
CReduce %s Operator
 | 
				
			||||
 | 
				
			||||
Call collective Reduce with reduce type %s. If input and output are
 | 
				
			||||
the same variable, in-place reduce will be used.
 | 
				
			||||
)DOC",
 | 
				
			||||
                               GetName(), GetName()));
 | 
				
			||||
  }
 | 
				
			||||
 | 
				
			||||
 protected:
 | 
				
			||||
  virtual std::string GetName() const = 0;
 | 
				
			||||
};
 | 
				
			||||
 | 
				
			||||
}  // namespace operators
 | 
				
			||||
}  // namespace paddle
 | 
				
			||||
@ -0,0 +1,39 @@
 | 
				
			||||
/* Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
 | 
				
			||||
 | 
				
			||||
Licensed under the Apache License, Version 2.0 (the "License");
 | 
				
			||||
you may not use this file except in compliance with the License.
 | 
				
			||||
You may obtain a copy of the License at
 | 
				
			||||
 | 
				
			||||
    http://www.apache.org/licenses/LICENSE-2.0
 | 
				
			||||
 | 
				
			||||
Unless required by applicable law or agreed to in writing, software
 | 
				
			||||
distributed under the License is distributed on an "AS IS" BASIS,
 | 
				
			||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 | 
				
			||||
See the License for the specific language governing permissions and
 | 
				
			||||
limitations under the License. */
 | 
				
			||||
 | 
				
			||||
#include "paddle/fluid/operators/collective/c_reduce_op.h"
 | 
				
			||||
 | 
				
			||||
namespace paddle {
 | 
				
			||||
namespace operators {
 | 
				
			||||
 | 
				
			||||
class CReduceProdOpMaker : public CReduceOpMaker {
 | 
				
			||||
 protected:
 | 
				
			||||
  std::string GetName() const override { return "Prod"; }
 | 
				
			||||
};
 | 
				
			||||
 | 
				
			||||
}  // namespace operators
 | 
				
			||||
}  // namespace paddle
 | 
				
			||||
 | 
				
			||||
namespace ops = paddle::operators;
 | 
				
			||||
namespace plat = paddle::platform;
 | 
				
			||||
 | 
				
			||||
REGISTER_OP_WITHOUT_GRADIENT(c_reduce_prod, ops::CReduceOp,
 | 
				
			||||
                             ops::CReduceProdOpMaker);
 | 
				
			||||
 | 
				
			||||
REGISTER_OP_CPU_KERNEL(c_reduce_prod,
 | 
				
			||||
                       ops::CReduceOpCPUKernel<ops::kRedProd, float>,
 | 
				
			||||
                       ops::CReduceOpCPUKernel<ops::kRedProd, double>,
 | 
				
			||||
                       ops::CReduceOpCPUKernel<ops::kRedProd, int>,
 | 
				
			||||
                       ops::CReduceOpCPUKernel<ops::kRedProd, int64_t>,
 | 
				
			||||
                       ops::CReduceOpCPUKernel<ops::kRedProd, plat::float16>)
 | 
				
			||||
@ -0,0 +1,25 @@
 | 
				
			||||
/* Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
 | 
				
			||||
 | 
				
			||||
Licensed under the Apache License, Version 2.0 (the "License");
 | 
				
			||||
you may not use this file except in compliance with the License.
 | 
				
			||||
You may obtain a copy of the License at
 | 
				
			||||
 | 
				
			||||
    http://www.apache.org/licenses/LICENSE-2.0
 | 
				
			||||
 | 
				
			||||
Unless required by applicable law or agreed to in writing, software
 | 
				
			||||
distributed under the License is distributed on an "AS IS" BASIS,
 | 
				
			||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 | 
				
			||||
See the License for the specific language governing permissions and
 | 
				
			||||
limitations under the License. */
 | 
				
			||||
 | 
				
			||||
#include "paddle/fluid/operators/collective/c_reduce_op.h"
 | 
				
			||||
 | 
				
			||||
namespace ops = paddle::operators;
 | 
				
			||||
namespace plat = paddle::platform;
 | 
				
			||||
 | 
				
			||||
REGISTER_OP_CUDA_KERNEL(c_reduce_prod,
 | 
				
			||||
                        ops::CReduceOpCUDAKernel<ops::kRedProd, float>,
 | 
				
			||||
                        ops::CReduceOpCUDAKernel<ops::kRedProd, double>,
 | 
				
			||||
                        ops::CReduceOpCUDAKernel<ops::kRedProd, int>,
 | 
				
			||||
                        ops::CReduceOpCUDAKernel<ops::kRedProd, int64_t>,
 | 
				
			||||
                        ops::CReduceOpCUDAKernel<ops::kRedProd, plat::float16>)
 | 
				
			||||
@ -0,0 +1,39 @@
 | 
				
			||||
/* Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
 | 
				
			||||
 | 
				
			||||
Licensed under the Apache License, Version 2.0 (the "License");
 | 
				
			||||
you may not use this file except in compliance with the License.
 | 
				
			||||
You may obtain a copy of the License at
 | 
				
			||||
 | 
				
			||||
    http://www.apache.org/licenses/LICENSE-2.0
 | 
				
			||||
 | 
				
			||||
Unless required by applicable law or agreed to in writing, software
 | 
				
			||||
distributed under the License is distributed on an "AS IS" BASIS,
 | 
				
			||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 | 
				
			||||
See the License for the specific language governing permissions and
 | 
				
			||||
limitations under the License. */
 | 
				
			||||
 | 
				
			||||
#include "paddle/fluid/operators/collective/c_reduce_op.h"
 | 
				
			||||
 | 
				
			||||
namespace paddle {
 | 
				
			||||
namespace operators {
 | 
				
			||||
 | 
				
			||||
class CReduceSumOpMaker : public CReduceOpMaker {
 | 
				
			||||
 protected:
 | 
				
			||||
  std::string GetName() const override { return "Sum"; }
 | 
				
			||||
};
 | 
				
			||||
 | 
				
			||||
}  // namespace operators
 | 
				
			||||
}  // namespace paddle
 | 
				
			||||
 | 
				
			||||
namespace ops = paddle::operators;
 | 
				
			||||
namespace plat = paddle::platform;
 | 
				
			||||
 | 
				
			||||
REGISTER_OP_WITHOUT_GRADIENT(c_reduce_sum, ops::CReduceOp,
 | 
				
			||||
                             ops::CReduceSumOpMaker);
 | 
				
			||||
 | 
				
			||||
REGISTER_OP_CPU_KERNEL(c_reduce_sum,
 | 
				
			||||
                       ops::CReduceOpCPUKernel<ops::kRedSum, float>,
 | 
				
			||||
                       ops::CReduceOpCPUKernel<ops::kRedSum, double>,
 | 
				
			||||
                       ops::CReduceOpCPUKernel<ops::kRedSum, int>,
 | 
				
			||||
                       ops::CReduceOpCPUKernel<ops::kRedSum, int64_t>,
 | 
				
			||||
                       ops::CReduceOpCPUKernel<ops::kRedSum, plat::float16>)
 | 
				
			||||
@ -0,0 +1,25 @@
 | 
				
			||||
/* Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
 | 
				
			||||
 | 
				
			||||
Licensed under the Apache License, Version 2.0 (the "License");
 | 
				
			||||
you may not use this file except in compliance with the License.
 | 
				
			||||
You may obtain a copy of the License at
 | 
				
			||||
 | 
				
			||||
    http://www.apache.org/licenses/LICENSE-2.0
 | 
				
			||||
 | 
				
			||||
Unless required by applicable law or agreed to in writing, software
 | 
				
			||||
distributed under the License is distributed on an "AS IS" BASIS,
 | 
				
			||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 | 
				
			||||
See the License for the specific language governing permissions and
 | 
				
			||||
limitations under the License. */
 | 
				
			||||
 | 
				
			||||
#include "paddle/fluid/operators/collective/c_reduce_op.h"
 | 
				
			||||
 | 
				
			||||
namespace ops = paddle::operators;
 | 
				
			||||
namespace plat = paddle::platform;
 | 
				
			||||
 | 
				
			||||
REGISTER_OP_CUDA_KERNEL(c_reduce_sum,
 | 
				
			||||
                        ops::CReduceOpCUDAKernel<ops::kRedSum, float>,
 | 
				
			||||
                        ops::CReduceOpCUDAKernel<ops::kRedSum, double>,
 | 
				
			||||
                        ops::CReduceOpCUDAKernel<ops::kRedSum, int>,
 | 
				
			||||
                        ops::CReduceOpCUDAKernel<ops::kRedSum, int64_t>,
 | 
				
			||||
                        ops::CReduceOpCUDAKernel<ops::kRedSum, plat::float16>)
 | 
				
			||||
@ -0,0 +1,92 @@
 | 
				
			||||
/* Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
 | 
				
			||||
 | 
				
			||||
Licensed under the Apache License, Version 2.0 (the "License");
 | 
				
			||||
you may not use this file except in compliance with the License.
 | 
				
			||||
You may obtain a copy of the License at
 | 
				
			||||
 | 
				
			||||
    http://www.apache.org/licenses/LICENSE-2.0
 | 
				
			||||
 | 
				
			||||
Unless required by applicable law or agreed to in writing, software
 | 
				
			||||
distributed under the License is distributed on an "AS IS" BASIS,
 | 
				
			||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 | 
				
			||||
See the License for the specific language governing permissions and
 | 
				
			||||
limitations under the License. */
 | 
				
			||||
 | 
				
			||||
#include "paddle/fluid/operators/collective/c_scatter_op.h"
 | 
				
			||||
 | 
				
			||||
namespace paddle {
 | 
				
			||||
namespace operators {
 | 
				
			||||
 | 
				
			||||
class CScatterOp : public framework::OperatorWithKernel {
 | 
				
			||||
 public:
 | 
				
			||||
  using framework::OperatorWithKernel::OperatorWithKernel;
 | 
				
			||||
 | 
				
			||||
  void InferShape(framework::InferShapeContext* ctx) const override {
 | 
				
			||||
    OP_INOUT_CHECK(ctx->HasInput("X"), "Input", "X", "CScatter");
 | 
				
			||||
    OP_INOUT_CHECK(ctx->HasOutput("Out"), "Output", "Out", "CScatter");
 | 
				
			||||
    int root_id = ctx->Attrs().Get<int>("root");
 | 
				
			||||
    int ring_id = ctx->Attrs().Get<int>("ring_id");
 | 
				
			||||
    int nranks = ctx->Attrs().Get<int>("nranks");
 | 
				
			||||
    PADDLE_ENFORCE_GE(nranks, 2,
 | 
				
			||||
                      platform::errors::InvalidArgument(
 | 
				
			||||
                          "The number of ranks (%d) must be greater than 1 "
 | 
				
			||||
                          "to use collective op (c_scatter op).",
 | 
				
			||||
                          nranks));
 | 
				
			||||
    PADDLE_ENFORCE_GE(
 | 
				
			||||
        root_id, 0,
 | 
				
			||||
        platform::errors::InvalidArgument(
 | 
				
			||||
            "The root_id (%d) for c_scatter_op must be non-negative.",
 | 
				
			||||
            root_id));
 | 
				
			||||
    PADDLE_ENFORCE_GE(
 | 
				
			||||
        ring_id, 0,
 | 
				
			||||
        platform::errors::InvalidArgument(
 | 
				
			||||
            "The ring_id (%d) for c_scatter_op must be non-negative.",
 | 
				
			||||
            root_id));
 | 
				
			||||
    framework::DDim dim = ctx->GetInputDim("X");
 | 
				
			||||
    dim[0] = dim[0] / nranks;
 | 
				
			||||
    if (dim[0] < 0) dim[0] = -1;
 | 
				
			||||
    ctx->SetOutputDim("Out", dim);
 | 
				
			||||
  }
 | 
				
			||||
 | 
				
			||||
 protected:
 | 
				
			||||
  framework::OpKernelType GetExpectedKernelType(
 | 
				
			||||
      const framework::ExecutionContext& ctx) const override {
 | 
				
			||||
    return framework::OpKernelType(
 | 
				
			||||
        OperatorWithKernel::IndicateVarDataType(ctx, "X"), ctx.GetPlace());
 | 
				
			||||
  }
 | 
				
			||||
};
 | 
				
			||||
 | 
				
			||||
class CScatterOpMaker : public framework::OpProtoAndCheckerMaker {
 | 
				
			||||
 public:
 | 
				
			||||
  void Make() {
 | 
				
			||||
    AddInput("X", "(Tensor) tensor to be broadcasted.");
 | 
				
			||||
    AddOutput("Out", "(Tensor) the result of broadcast.");
 | 
				
			||||
    AddAttr<int>("ring_id", "(int default 0) nccl communication ring id.")
 | 
				
			||||
        .SetDefault(0);
 | 
				
			||||
    AddAttr<int>("root", "(int default 0) root id for broadcasting.")
 | 
				
			||||
        .SetDefault(0);
 | 
				
			||||
    AddAttr<int>("nranks", "(int default 1) number of ranks.").SetDefault(0);
 | 
				
			||||
    AddAttr<bool>(
 | 
				
			||||
        "use_calc_stream",
 | 
				
			||||
        "(bool default false) eject CUDA operations to calculation stream.")
 | 
				
			||||
        .SetDefault(false);
 | 
				
			||||
    AddComment(R"DOC(
 | 
				
			||||
CScatter Operator
 | 
				
			||||
Scatter the source to all participators.
 | 
				
			||||
)DOC");
 | 
				
			||||
  }
 | 
				
			||||
};
 | 
				
			||||
 | 
				
			||||
}  // namespace operators
 | 
				
			||||
}  // namespace paddle
 | 
				
			||||
 | 
				
			||||
namespace ops = paddle::operators;
 | 
				
			||||
namespace plat = paddle::platform;
 | 
				
			||||
 | 
				
			||||
REGISTER_OP_WITHOUT_GRADIENT(c_scatter, ops::CScatterOp, ops::CScatterOpMaker);
 | 
				
			||||
 | 
				
			||||
REGISTER_OP_CPU_KERNEL(c_scatter, ops::CScatterOpCPUKernel<float>,
 | 
				
			||||
                       ops::CScatterOpCPUKernel<double>,
 | 
				
			||||
                       ops::CScatterOpCPUKernel<int>,
 | 
				
			||||
                       ops::CScatterOpCPUKernel<int64_t>,
 | 
				
			||||
                       ops::CScatterOpCPUKernel<plat::float16>);
 | 
				
			||||
@ -0,0 +1,101 @@
 | 
				
			||||
/* Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
 | 
				
			||||
 | 
				
			||||
Licensed under the Apache License, Version 2.0 (the "License");
 | 
				
			||||
you may not use this file except in compliance with the License.
 | 
				
			||||
You may obtain a copy of the License at
 | 
				
			||||
 | 
				
			||||
    http://www.apache.org/licenses/LICENSE-2.0
 | 
				
			||||
 | 
				
			||||
Unless required by applicable law or agreed to in writing, software
 | 
				
			||||
distributed under the License is distributed on an "AS IS" BASIS,
 | 
				
			||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 | 
				
			||||
See the License for the specific language governing permissions and
 | 
				
			||||
limitations under the License. */
 | 
				
			||||
 | 
				
			||||
#include "paddle/fluid/operators/collective/c_scatter_op.h"
 | 
				
			||||
 | 
				
			||||
#if defined(PADDLE_WITH_NCCL)
 | 
				
			||||
#include "paddle/fluid/platform/collective_helper.h"
 | 
				
			||||
#include "paddle/fluid/platform/nccl_helper.h"
 | 
				
			||||
#endif
 | 
				
			||||
 | 
				
			||||
namespace paddle {
 | 
				
			||||
namespace operators {
 | 
				
			||||
 | 
				
			||||
template <typename T>
 | 
				
			||||
class CScatterOpCUDAKernel : public framework::OpKernel<T> {
 | 
				
			||||
 public:
 | 
				
			||||
  void Compute(const framework::ExecutionContext& ctx) const override {
 | 
				
			||||
#if defined(PADDLE_WITH_NCCL)
 | 
				
			||||
    auto x = ctx.Input<framework::LoDTensor>("X");
 | 
				
			||||
    auto out = ctx.Output<framework::LoDTensor>("Out");
 | 
				
			||||
    int numel = x->numel();
 | 
				
			||||
    ncclDataType_t dtype = platform::ToNCCLDataType(x->type());
 | 
				
			||||
 | 
				
			||||
    int nranks = ctx.Attr<int>("nranks");
 | 
				
			||||
    int root_id = ctx.Attr<int>("root");
 | 
				
			||||
    int ring_id = ctx.Attr<int>("ring_id");
 | 
				
			||||
    auto place = ctx.GetPlace();
 | 
				
			||||
    auto comm = platform::NCCLCommContext::Instance().Get(ring_id, place);
 | 
				
			||||
    PADDLE_ENFORCE_EQ(nranks, comm->nranks(),
 | 
				
			||||
                      platform::errors::InvalidArgument(
 | 
				
			||||
                          "The number of ranks (%d) you set of must "
 | 
				
			||||
                          "be equal to comm->nranks (%d).",
 | 
				
			||||
                          nranks, comm->nranks()));
 | 
				
			||||
    PADDLE_ENFORCE_GE(
 | 
				
			||||
        root_id, 0,
 | 
				
			||||
        platform::errors::InvalidArgument(
 | 
				
			||||
            "The root_id (%d) for c_scatter_op must be non-negative.",
 | 
				
			||||
            root_id));
 | 
				
			||||
    PADDLE_ENFORCE_GE(
 | 
				
			||||
        ring_id, 0,
 | 
				
			||||
        platform::errors::InvalidArgument(
 | 
				
			||||
            "The ring_id (%d) for c_scatter_op must be non-negative.",
 | 
				
			||||
            ring_id));
 | 
				
			||||
 | 
				
			||||
    cudaStream_t stream = nullptr;
 | 
				
			||||
    if (ctx.Attr<bool>("use_calc_stream")) {
 | 
				
			||||
      auto dev_ctx = platform::DeviceContextPool::Instance().Get(place);
 | 
				
			||||
      stream = static_cast<platform::CUDADeviceContext*>(dev_ctx)->stream();
 | 
				
			||||
    } else {
 | 
				
			||||
      stream = comm->stream();
 | 
				
			||||
    }
 | 
				
			||||
 | 
				
			||||
    framework::DDim x_dims = x->dims();
 | 
				
			||||
    framework::DDim out_dims(x_dims);
 | 
				
			||||
    framework::Tensor temp;
 | 
				
			||||
    auto in_data_ptr = x->data<T>();
 | 
				
			||||
    PADDLE_ENFORCE_CUDA_SUCCESS(platform::dynload::ncclBroadcast(
 | 
				
			||||
        reinterpret_cast<const void*>(in_data_ptr),
 | 
				
			||||
        temp.mutable_data<T>(out_dims, place), numel, dtype, root_id,
 | 
				
			||||
        comm->comm(), stream));
 | 
				
			||||
    VLOG(3) << "rank " << comm->rank() << " invoke Scatter.";
 | 
				
			||||
 | 
				
			||||
    out_dims[0] = out_dims[0] / nranks;
 | 
				
			||||
    auto start_index = out_dims[0] * comm->rank();
 | 
				
			||||
    auto end_index = start_index + out_dims[0];
 | 
				
			||||
    temp = temp.Slice(start_index, end_index);
 | 
				
			||||
    temp.Resize(out_dims);
 | 
				
			||||
    out->mutable_data<T>(out_dims, place);
 | 
				
			||||
    framework::TensorCopySync(*static_cast<const framework::Tensor*>(&temp),
 | 
				
			||||
                              place, static_cast<framework::Tensor*>(out));
 | 
				
			||||
    out->Resize(out_dims);
 | 
				
			||||
#else
 | 
				
			||||
    PADDLE_ENFORCE_EQ(
 | 
				
			||||
        true, false,
 | 
				
			||||
        platform::errors::Unavailable("PaddlePaddle should compile with GPU."));
 | 
				
			||||
#endif
 | 
				
			||||
  }
 | 
				
			||||
};
 | 
				
			||||
 | 
				
			||||
}  // namespace operators
 | 
				
			||||
}  // namespace paddle
 | 
				
			||||
 | 
				
			||||
namespace ops = paddle::operators;
 | 
				
			||||
namespace plat = paddle::platform;
 | 
				
			||||
 | 
				
			||||
REGISTER_OP_CUDA_KERNEL(c_scatter, ops::CScatterOpCUDAKernel<float>,
 | 
				
			||||
                        ops::CScatterOpCUDAKernel<double>,
 | 
				
			||||
                        ops::CScatterOpCUDAKernel<int>,
 | 
				
			||||
                        ops::CScatterOpCUDAKernel<int64_t>,
 | 
				
			||||
                        ops::CScatterOpCUDAKernel<plat::float16>);
 | 
				
			||||
@ -0,0 +1,39 @@
 | 
				
			||||
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
 | 
				
			||||
 | 
				
			||||
Licensed under the Apache License, Version 2.0 (the "License");
 | 
				
			||||
you may not use this file except in compliance with the License.
 | 
				
			||||
You may obtain a copy of the License at
 | 
				
			||||
 | 
				
			||||
    http://www.apache.org/licenses/LICENSE-2.0
 | 
				
			||||
 | 
				
			||||
Unless required by applicable law or agreed to in writing, software
 | 
				
			||||
distributed under the License is distributed on an "AS IS" BASIS,
 | 
				
			||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 | 
				
			||||
See the License for the specific language governing permissions and
 | 
				
			||||
limitations under the License. */
 | 
				
			||||
 | 
				
			||||
#pragma once
 | 
				
			||||
 | 
				
			||||
#include <algorithm>
 | 
				
			||||
#include <utility>
 | 
				
			||||
#include <vector>
 | 
				
			||||
 | 
				
			||||
#include "paddle/fluid/framework/data_type.h"
 | 
				
			||||
#include "paddle/fluid/framework/lod_tensor.h"
 | 
				
			||||
#include "paddle/fluid/framework/op_registry.h"
 | 
				
			||||
 | 
				
			||||
namespace paddle {
 | 
				
			||||
namespace operators {
 | 
				
			||||
 | 
				
			||||
template <typename T>
 | 
				
			||||
class CScatterOpCPUKernel : public framework::OpKernel<T> {
 | 
				
			||||
 public:
 | 
				
			||||
  void Compute(const framework::ExecutionContext& ctx) const override {
 | 
				
			||||
    PADDLE_ENFORCE_EQ(true, false,
 | 
				
			||||
                      platform::errors::Unavailable(
 | 
				
			||||
                          "Unimplemented cpu kernel for CScatterOp."));
 | 
				
			||||
  }
 | 
				
			||||
};
 | 
				
			||||
 | 
				
			||||
}  // namespace operators
 | 
				
			||||
}  // namespace paddle
 | 
				
			||||
@ -0,0 +1,70 @@
 | 
				
			||||
# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
 | 
				
			||||
#
 | 
				
			||||
# Licensed under the Apache License, Version 2.0 (the "License");
 | 
				
			||||
# you may not use this file except in compliance with the License.
 | 
				
			||||
# You may obtain a copy of the License at
 | 
				
			||||
#
 | 
				
			||||
#     http://www.apache.org/licenses/LICENSE-2.0
 | 
				
			||||
#
 | 
				
			||||
# Unless required by applicable law or agreed to in writing, software
 | 
				
			||||
# distributed under the License is distributed on an "AS IS" BASIS,
 | 
				
			||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 | 
				
			||||
# See the License for the specific language governing permissions and
 | 
				
			||||
# limitations under the License.
 | 
				
			||||
 | 
				
			||||
from __future__ import print_function
 | 
				
			||||
 | 
				
			||||
import numpy as np
 | 
				
			||||
import argparse
 | 
				
			||||
import os
 | 
				
			||||
import sys
 | 
				
			||||
import signal
 | 
				
			||||
import time
 | 
				
			||||
import socket
 | 
				
			||||
from contextlib import closing
 | 
				
			||||
from six import string_types
 | 
				
			||||
import math
 | 
				
			||||
import paddle
 | 
				
			||||
import paddle.fluid as fluid
 | 
				
			||||
import paddle.fluid.profiler as profiler
 | 
				
			||||
import paddle.fluid.unique_name as nameGen
 | 
				
			||||
from paddle.fluid import core
 | 
				
			||||
import unittest
 | 
				
			||||
from multiprocessing import Process
 | 
				
			||||
import paddle.fluid.layers as layers
 | 
				
			||||
from functools import reduce
 | 
				
			||||
from test_collective_base import TestCollectiveRunnerBase, runtime_main
 | 
				
			||||
 | 
				
			||||
 | 
				
			||||
class TestCollectiveReduce(TestCollectiveRunnerBase):
 | 
				
			||||
    def __init__(self):
 | 
				
			||||
        self.global_ring_id = 0
 | 
				
			||||
 | 
				
			||||
    def get_model(self, main_prog, startup_program):
 | 
				
			||||
        ring_id = 0
 | 
				
			||||
        rootid = 1
 | 
				
			||||
        with fluid.program_guard(main_prog, startup_program):
 | 
				
			||||
            tindata = layers.data(
 | 
				
			||||
                name="tindata", shape=[10, 1000], dtype='float32')
 | 
				
			||||
            toutdata = main_prog.current_block().create_var(
 | 
				
			||||
                name="outofreduce",
 | 
				
			||||
                dtype='float32',
 | 
				
			||||
                type=core.VarDesc.VarType.LOD_TENSOR,
 | 
				
			||||
                persistable=False,
 | 
				
			||||
                stop_gradient=False)
 | 
				
			||||
            main_prog.global_block().append_op(
 | 
				
			||||
                type="c_reduce_sum",
 | 
				
			||||
                inputs={'X': tindata},
 | 
				
			||||
                attrs={'ring_id': ring_id,
 | 
				
			||||
                       'root_id': rootid},
 | 
				
			||||
                outputs={'Out': toutdata})
 | 
				
			||||
            main_prog.global_block().append_op(
 | 
				
			||||
                type="c_sync_comm_stream",
 | 
				
			||||
                inputs={'X': toutdata},
 | 
				
			||||
                outputs={'Out': toutdata},
 | 
				
			||||
                attrs={'ring_id': ring_id})
 | 
				
			||||
            return toutdata
 | 
				
			||||
 | 
				
			||||
 | 
				
			||||
if __name__ == "__main__":
 | 
				
			||||
    runtime_main(TestCollectiveReduce, "reduce", 0)
 | 
				
			||||
@ -0,0 +1,73 @@
 | 
				
			||||
# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
 | 
				
			||||
#
 | 
				
			||||
# Licensed under the Apache License, Version 2.0 (the "License");
 | 
				
			||||
# you may not use this file except in compliance with the License.
 | 
				
			||||
# You may obtain a copy of the License at
 | 
				
			||||
#
 | 
				
			||||
#     http://www.apache.org/licenses/LICENSE-2.0
 | 
				
			||||
#
 | 
				
			||||
# Unless required by applicable law or agreed to in writing, software
 | 
				
			||||
# distributed under the License is distributed on an "AS IS" BASIS,
 | 
				
			||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 | 
				
			||||
# See the License for the specific language governing permissions and
 | 
				
			||||
# limitations under the License.
 | 
				
			||||
 | 
				
			||||
from __future__ import print_function
 | 
				
			||||
 | 
				
			||||
import numpy as np
 | 
				
			||||
import argparse
 | 
				
			||||
import os
 | 
				
			||||
import sys
 | 
				
			||||
import signal
 | 
				
			||||
import time
 | 
				
			||||
import socket
 | 
				
			||||
from contextlib import closing
 | 
				
			||||
from six import string_types
 | 
				
			||||
import math
 | 
				
			||||
import paddle
 | 
				
			||||
import paddle.fluid as fluid
 | 
				
			||||
import paddle.fluid.profiler as profiler
 | 
				
			||||
import paddle.fluid.unique_name as nameGen
 | 
				
			||||
from paddle.fluid import core
 | 
				
			||||
import unittest
 | 
				
			||||
from multiprocessing import Process
 | 
				
			||||
import paddle.fluid.layers as layers
 | 
				
			||||
from functools import reduce
 | 
				
			||||
from test_collective_base import TestCollectiveRunnerBase, runtime_main
 | 
				
			||||
 | 
				
			||||
 | 
				
			||||
class TestCollectiveReduce(TestCollectiveRunnerBase):
 | 
				
			||||
    def __init__(self):
 | 
				
			||||
        self.global_ring_id = 0
 | 
				
			||||
 | 
				
			||||
    def get_model(self, main_prog, startup_program):
 | 
				
			||||
        ring_id = 0
 | 
				
			||||
        rootid = 1
 | 
				
			||||
        with fluid.program_guard(main_prog, startup_program):
 | 
				
			||||
            tindata = layers.data(
 | 
				
			||||
                name="tindata", shape=[10, 1000], dtype='float32')
 | 
				
			||||
            toutdata = main_prog.current_block().create_var(
 | 
				
			||||
                name="outofreduce",
 | 
				
			||||
                dtype='float32',
 | 
				
			||||
                type=core.VarDesc.VarType.LOD_TENSOR,
 | 
				
			||||
                persistable=False,
 | 
				
			||||
                stop_gradient=False)
 | 
				
			||||
            main_prog.global_block().append_op(
 | 
				
			||||
                type="c_reduce_sum",
 | 
				
			||||
                inputs={'X': tindata},
 | 
				
			||||
                attrs={
 | 
				
			||||
                    'ring_id': ring_id,
 | 
				
			||||
                    'use_calc_stream': True,
 | 
				
			||||
                    'root_id': rootid
 | 
				
			||||
                },
 | 
				
			||||
                outputs={'Out': toutdata})
 | 
				
			||||
            main_prog.global_block().append_op(
 | 
				
			||||
                type="c_sync_comm_stream",
 | 
				
			||||
                inputs={'X': toutdata},
 | 
				
			||||
                outputs={'Out': toutdata},
 | 
				
			||||
                attrs={'ring_id': ring_id})
 | 
				
			||||
            return toutdata
 | 
				
			||||
 | 
				
			||||
 | 
				
			||||
if __name__ == "__main__":
 | 
				
			||||
    runtime_main(TestCollectiveReduce, "reduce", 0)
 | 
				
			||||
@ -0,0 +1,71 @@
 | 
				
			||||
# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
 | 
				
			||||
#
 | 
				
			||||
# Licensed under the Apache License, Version 2.0 (the "License");
 | 
				
			||||
# you may not use this file except in compliance with the License.
 | 
				
			||||
# You may obtain a copy of the License at
 | 
				
			||||
#
 | 
				
			||||
#     http://www.apache.org/licenses/LICENSE-2.0
 | 
				
			||||
#
 | 
				
			||||
# Unless required by applicable law or agreed to in writing, software
 | 
				
			||||
# distributed under the License is distributed on an "AS IS" BASIS,
 | 
				
			||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 | 
				
			||||
# See the License for the specific language governing permissions and
 | 
				
			||||
# limitations under the License.
 | 
				
			||||
 | 
				
			||||
from __future__ import print_function
 | 
				
			||||
 | 
				
			||||
import numpy as np
 | 
				
			||||
import argparse
 | 
				
			||||
import os
 | 
				
			||||
import sys
 | 
				
			||||
import signal
 | 
				
			||||
import time
 | 
				
			||||
import socket
 | 
				
			||||
from contextlib import closing
 | 
				
			||||
from six import string_types
 | 
				
			||||
import math
 | 
				
			||||
import paddle
 | 
				
			||||
import paddle.fluid as fluid
 | 
				
			||||
import paddle.fluid.profiler as profiler
 | 
				
			||||
import paddle.fluid.unique_name as nameGen
 | 
				
			||||
from paddle.fluid import core
 | 
				
			||||
import unittest
 | 
				
			||||
from multiprocessing import Process
 | 
				
			||||
import paddle.fluid.layers as layers
 | 
				
			||||
from functools import reduce
 | 
				
			||||
from test_collective_base import TestCollectiveRunnerBase, runtime_main
 | 
				
			||||
 | 
				
			||||
 | 
				
			||||
class TestCollectiveScatter(TestCollectiveRunnerBase):
 | 
				
			||||
    def __init__(self):
 | 
				
			||||
        self.global_ring_id = 0
 | 
				
			||||
 | 
				
			||||
    def get_model(self, main_prog, startup_program):
 | 
				
			||||
        ring_id = 0
 | 
				
			||||
        rootid = 1
 | 
				
			||||
        with fluid.program_guard(main_prog, startup_program):
 | 
				
			||||
            tindata = layers.data(
 | 
				
			||||
                name="tindata", shape=[10, 1000], dtype='float32')
 | 
				
			||||
            toutdata = main_prog.current_block().create_var(
 | 
				
			||||
                name="outofreduce",
 | 
				
			||||
                dtype='float32',
 | 
				
			||||
                type=core.VarDesc.VarType.LOD_TENSOR,
 | 
				
			||||
                persistable=False,
 | 
				
			||||
                stop_gradient=False)
 | 
				
			||||
            main_prog.global_block().append_op(
 | 
				
			||||
                type="c_scatter",
 | 
				
			||||
                inputs={'X': tindata},
 | 
				
			||||
                attrs={'ring_id': ring_id,
 | 
				
			||||
                       'root': rootid,
 | 
				
			||||
                       'nranks': 2},
 | 
				
			||||
                outputs={'Out': toutdata})
 | 
				
			||||
            main_prog.global_block().append_op(
 | 
				
			||||
                type="c_sync_comm_stream",
 | 
				
			||||
                inputs={'X': toutdata},
 | 
				
			||||
                outputs={'Out': toutdata},
 | 
				
			||||
                attrs={'ring_id': ring_id})
 | 
				
			||||
            return toutdata
 | 
				
			||||
 | 
				
			||||
 | 
				
			||||
if __name__ == "__main__":
 | 
				
			||||
    runtime_main(TestCollectiveScatter, "scatter", 0)
 | 
				
			||||
@ -0,0 +1,34 @@
 | 
				
			||||
#   Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
 | 
				
			||||
#
 | 
				
			||||
# Licensed under the Apache License, Version 2.0 (the "License");
 | 
				
			||||
# you may not use this file except in compliance with the License.
 | 
				
			||||
# You may obtain a copy of the License at
 | 
				
			||||
#
 | 
				
			||||
#     http://www.apache.org/licenses/LICENSE-2.0
 | 
				
			||||
#
 | 
				
			||||
# Unless required by applicable law or agreed to in writing, software
 | 
				
			||||
# distributed under the License is distributed on an "AS IS" BASIS,
 | 
				
			||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 | 
				
			||||
# See the License for the specific language governing permissions and
 | 
				
			||||
# limitations under the License.
 | 
				
			||||
 | 
				
			||||
from __future__ import print_function
 | 
				
			||||
import unittest
 | 
				
			||||
import numpy as np
 | 
				
			||||
 | 
				
			||||
from test_collective_base import TestDistBase
 | 
				
			||||
 | 
				
			||||
 | 
				
			||||
class TestCReduceOp(TestDistBase):
 | 
				
			||||
    def _setup_config(self):
 | 
				
			||||
        pass
 | 
				
			||||
 | 
				
			||||
    def test_reduce(self):
 | 
				
			||||
        self.check_with_place("collective_reduce_op.py", "reduce")
 | 
				
			||||
 | 
				
			||||
    def test_reduce_calc_stream(self):
 | 
				
			||||
        self.check_with_place("collective_reduce_op_calc_stream.py", "reduce")
 | 
				
			||||
 | 
				
			||||
 | 
				
			||||
if __name__ == '__main__':
 | 
				
			||||
    unittest.main()
 | 
				
			||||
@ -0,0 +1,31 @@
 | 
				
			||||
#   Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
 | 
				
			||||
#
 | 
				
			||||
# Licensed under the Apache License, Version 2.0 (the "License");
 | 
				
			||||
# you may not use this file except in compliance with the License.
 | 
				
			||||
# You may obtain a copy of the License at
 | 
				
			||||
#
 | 
				
			||||
#     http://www.apache.org/licenses/LICENSE-2.0
 | 
				
			||||
#
 | 
				
			||||
# Unless required by applicable law or agreed to in writing, software
 | 
				
			||||
# distributed under the License is distributed on an "AS IS" BASIS,
 | 
				
			||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 | 
				
			||||
# See the License for the specific language governing permissions and
 | 
				
			||||
# limitations under the License.
 | 
				
			||||
 | 
				
			||||
from __future__ import print_function
 | 
				
			||||
import unittest
 | 
				
			||||
import numpy as np
 | 
				
			||||
 | 
				
			||||
from test_collective_base import TestDistBase
 | 
				
			||||
 | 
				
			||||
 | 
				
			||||
class TestCScatterOp(TestDistBase):
 | 
				
			||||
    def _setup_config(self):
 | 
				
			||||
        pass
 | 
				
			||||
 | 
				
			||||
    def test_scatter(self):
 | 
				
			||||
        self.check_with_place("collective_scatter_op.py", "scatter")
 | 
				
			||||
 | 
				
			||||
 | 
				
			||||
if __name__ == '__main__':
 | 
				
			||||
    unittest.main()
 | 
				
			||||
					Loading…
					
					
				
		Reference in new issue