parent
0990c87bf6
commit
1e8474b9f1
@ -1,8 +0,0 @@
|
||||
import unittest, os
|
||||
import numpy as np
|
||||
import paddle.v2 as paddle
|
||||
from paddle.v2.framework.op import Operator
|
||||
import paddle.v2.framework.core as core
|
||||
from op_test import OpTest, create_op, set_input
|
||||
|
||||
gpu_list = "0,1,2,3"
|
@ -1,87 +0,0 @@
|
||||
import unittest, os
|
||||
import numpy as np
|
||||
import paddle.v2 as paddle
|
||||
from paddle.v2.framework.op import Operator
|
||||
import paddle.v2.framework.core as core
|
||||
from op_test import OpTest, create_op, set_input
|
||||
|
||||
# gpu_list = os.environ["NV_LIST"]
|
||||
gpu_list = "0,1,2,3"
|
||||
|
||||
if not core.is_compile_gpu() or not gpu_list:
|
||||
exit(0)
|
||||
|
||||
|
||||
def allreduce(tensors, gpus):
|
||||
num_device = len(gpus)
|
||||
assert (len(tensors) == num_device), "not match of tensor and device"
|
||||
Out = tensors
|
||||
for i in range(1, len(tensors)):
|
||||
Out[0] += Out[i]
|
||||
|
||||
for i in range(1, len(tensors)):
|
||||
Out[i] = Out[0]
|
||||
|
||||
return Out
|
||||
|
||||
|
||||
class TestNCCLAllReduce(unittest.TestCase):
|
||||
def setUp(self):
|
||||
|
||||
self.op_type = "ncclAllReduce"
|
||||
|
||||
self.gpus = [int(g) for g in gpu_list.split(",")]
|
||||
|
||||
self.g_scope = core.Scope()
|
||||
self.g_ctx = core.DeviceContext.create(core.CPUPlace())
|
||||
self.scopes = []
|
||||
self.ops = []
|
||||
self.places = []
|
||||
|
||||
self.input_data = []
|
||||
|
||||
for i in range(len(self.gpus)):
|
||||
self.input_data.append(np.random.random((32, 32)))
|
||||
self.output_data = allreduce(self.input_data, self.gpus)
|
||||
|
||||
nccl_init = Operator("ncclInit", Out="Communicator", gpus=self.gpus)
|
||||
op.run(self.g_scope, self.g_ctx)
|
||||
|
||||
for i in range(len(self.gpus)):
|
||||
# insert kid scope
|
||||
scope = self.g_scope.new_scope()
|
||||
place = core.GPUPlace(self.gpus[i])
|
||||
|
||||
inputs = {"X": self.input_data[i]}
|
||||
outputs = {"Out": self.output_data[i]}
|
||||
attrs = {"gpus": self.gpus}
|
||||
|
||||
op = create_op(scope, self.op_type, inputs, outputs, attrs)
|
||||
set_input(scope, op, inputs, place)
|
||||
|
||||
self.scopes.append(scope)
|
||||
self.ops.append(op)
|
||||
self.places.append(place)
|
||||
|
||||
def test_output(self):
|
||||
idx = 0
|
||||
for scope, place, op in zip(self.scopes, self.places, self.ops):
|
||||
ctx = core.DeviceContext.create(place)
|
||||
op.run(scope, ctx)
|
||||
|
||||
for out_name, out_dup in Operator.get_op_outputs(self.op.type()):
|
||||
actual = np.array(scope.find_var(out_name).get_tensor())
|
||||
expect = self.output_data[idx]
|
||||
|
||||
idx += 1
|
||||
self.assertTrue(actual, expect), "has diff"
|
||||
|
||||
|
||||
# if __name__ == "__main__":
|
||||
# unittest.main()
|
||||
# usage : export NV_LIST=0,1,2,3 python *.py
|
||||
|
||||
# os.environ["NV_LIST"] = ["0,1,2,3"]
|
||||
|
||||
if __name__ == "__main__":
|
||||
unittest.main()
|
Loading…
Reference in new issue