fix gpu build error

8 years ago · 358261f0bd
parent 2ddef137fe
commit 358261f0bd
5 changed files with 47 additions and 33 deletions
--- a/paddle/pybind/pybind.cc
+++ b/paddle/pybind/pybind.cc
@ -64,23 +64,25 @@ PYBIND11_PLUGIN(core) {
             self.Resize(pd::make_ddim(dim));
           })
      .def("alloc_float",
-           [](pd::Tensor& self, paddle::platform::Place& place) {
+           [](pd::Tensor& self, paddle::platform::GPUPlace& place) {
             self.mutable_data<float>(place);
           })
      .def("alloc_float",
-           [](pd::Tensor& self) {
-             self.mutable_data<float>(paddle::platform::CPUPlace());
+           [](pd::Tensor& self, paddle::platform::CPUPlace& place) {
+             self.mutable_data<float>(place);
           })
      .def("alloc_int",
-           [](pd::Tensor& self, paddle::platform::Place& place) {
+           [](pd::Tensor& self, paddle::platform::CPUPlace& place) {
             self.mutable_data<int>(place);
           })
      .def("alloc_int",
-           [](pd::Tensor& self) {
-             self.mutable_data<int>(paddle::platform::CPUPlace());
+           [](pd::Tensor& self, paddle::platform::GPUPlace& place) {
+             self.mutable_data<int>(place);
           })
-      .def("set", paddle::pybind::PyTensorSetFromArray<float>)
-      .def("set", paddle::pybind::PyTensorSetFromArray<int>)
+      .def("set", paddle::pybind::PyCPUTensorSetFromArray<float>)
+      .def("set", paddle::pybind::PyCUDATensorSetFromArray<float>)
+      .def("set", paddle::pybind::PyCPUTensorSetFromArray<int>)
+      .def("set", paddle::pybind::PyCUDATensorSetFromArray<int>)
      .def("shape",
           [](pd::Tensor& self) { return pd::vectorize(self.dims()); });

@ -144,9 +146,9 @@ All parameter, weight, gradient are variables in Paddle.
                      })
 #endif
      ;  // NOLINT
-  py::class_<paddle::platform::Place>(m, "GPUPlace").def(py::init<int>());
+  py::class_<paddle::platform::GPUPlace>(m, "GPUPlace").def(py::init<int>());

-  py::class_<paddle::platform::Place>(m, "CPUPlace").def(py::init<>());
+  py::class_<paddle::platform::CPUPlace>(m, "CPUPlace").def(py::init<>());

  py::class_<pd::OperatorBase, std::shared_ptr<pd::OperatorBase>> operator_base(
      m, "Operator");
--- a/paddle/pybind/tensor_bind.h
+++ b/paddle/pybind/tensor_bind.h
@ -61,7 +61,7 @@ struct CastToPyBufferImpl<true, I, ARGS...> {
      framework::Tensor dst_tensor;
      if (paddle::platform::is_gpu_place(tensor.holder_->place())) {
        dst_tensor.CopyFrom<CUR_TYPE>(tensor, platform::CPUPlace());
-      } else if (paddle::platform::is_gpu_place(tensor.holder_->place())) {
+      } else if (paddle::platform::is_cpu_place(tensor.holder_->place())) {
        dst_tensor = tensor;
      }
      return py::buffer_info(
@ -84,9 +84,10 @@ inline py::buffer_info CastToPyBuffer(framework::Tensor &tensor) {
 }

 template <typename T>
-void PyTensorSetFromArray(
+void PyCPUTensorSetFromArray(
    framework::Tensor &self,
-    py::array_t<T, py::array::c_style | py::array::forcecast> array) {
+    py::array_t<T, py::array::c_style | py::array::forcecast> array,
+    paddle::platform::CPUPlace &place) {
  std::vector<int> dims;
  dims.reserve(array.ndim());
  for (size_t i = 0; i < array.ndim(); ++i) {
@ -94,18 +95,26 @@ void PyTensorSetFromArray(
  }

  self.Resize(framework::make_ddim(dims));
-  auto *dst = self.mutable_data<T>(self.place());
-
-  if (paddle::platform::is_cpu_place(self.place())) {
-    std::memcpy(dst, array.data(), sizeof(T) * array.size());
-  } else if (paddle::platform::is_gpu_place(self.place())) {
-#ifdef PADDLE_ONLY_CPU
-    PADDLE_THROW("'GPUPlace' is not supported in CPU only device.");
-#else
-    platform::GpuMemcpySync(
-        dst, array.data(), sizeof(T) * array.size(), cudaMemcpyHostToDevice);
-#endif
+  auto *dst = self.mutable_data<T>(place);
+  std::memcpy(dst, array.data(), sizeof(T) * array.size());
+}
+
+template <typename T>
+void PyCUDATensorSetFromArray(
+    framework::Tensor &self,
+    py::array_t<T, py::array::c_style | py::array::forcecast> array,
+    paddle::platform::GPUPlace &place) {
+  std::vector<int> dims;
+  dims.reserve(array.ndim());
+  for (size_t i = 0; i < array.ndim(); ++i) {
+    dims.push_back((int)array.shape()[i]);
  }
+
+  self.Resize(framework::make_ddim(dims));
+  auto *dst = self.mutable_data<T>(place);
+  std::memcpy(dst, array.data(), sizeof(T) * array.size());
+  paddle::platform::GpuMemcpySync(
+      dst, array.data(), sizeof(T) * array.size(), cudaMemcpyHostToDevice);
 }

 }  // namespace pybind
--- a/python/paddle/v2/framework/tests/op_test_util.py
+++ b/python/paddle/v2/framework/tests/op_test_util.py
@ -25,6 +25,7 @@ class OpTestMeta(type):
            self.assertIsNotNone(func)

            scope = core.Scope(None)
+            place = core.CPUPlace()
            kwargs = dict()

            for in_name in func.all_input_args:
@ -33,7 +34,7 @@ class OpTestMeta(type):
                    var = scope.create_var(in_name).get_tensor()
                    arr = getattr(self, in_name)
                    var.set_dims(arr.shape)
-                    var.set(arr)
+                    var.set(arr, place)
                else:
                    kwargs[in_name] = "@EMPTY@"

--- a/python/paddle/v2/framework/tests/test_fc_op.py
+++ b/python/paddle/v2/framework/tests/test_fc_op.py
@ -7,17 +7,18 @@ import paddle.v2.framework.create_op_creation_methods as creation
 class TestFc(unittest.TestCase):
    def test_fc(self):
        scope = core.Scope(None)
+        place = core.CPUPlace()
        x = scope.create_var("X")
        x_tensor = x.get_tensor()
        x_tensor.set_dims([1000, 784])
-        x_tensor.alloc_float()
+        x_tensor.alloc_float(place)

        w = scope.create_var("W")
        w_tensor = w.get_tensor()
        w_tensor.set_dims([784, 100])
-        w_tensor.alloc_float()
+        w_tensor.alloc_float(place)

-        w_tensor.set(numpy.random.random((784, 100)).astype("float32"))
+        w_tensor.set(numpy.random.random((784, 100)).astype("float32"), place)

        # Set a real numpy array here.
        # x_tensor.set(numpy.array([]))
--- a/python/paddle/v2/framework/tests/test_tensor.py
+++ b/python/paddle/v2/framework/tests/test_tensor.py
@ -7,16 +7,16 @@ class TestScope(unittest.TestCase):
    def test_int_tensor(self):
        scope = core.Scope(None)
        var = scope.create_var("test_tensor")
+        place = core.CPUPlace()
        tensor = var.get_tensor()

        tensor.set_dims([1000, 784])
-        tensor.alloc_int()
-
+        tensor.alloc_int(place)
        tensor_array = numpy.array(tensor)
        self.assertEqual((1000, 784), tensor_array.shape)
        tensor_array[3, 9] = 1
        tensor_array[19, 11] = 2
-        tensor.set(tensor_array)
+        tensor.set(tensor_array, place)

        tensor_array_2 = numpy.array(tensor)
        self.assertEqual(1.0, tensor_array_2[3, 9])
@ -25,16 +25,17 @@ class TestScope(unittest.TestCase):
    def test_float_tensor(self):
        scope = core.Scope(None)
        var = scope.create_var("test_tensor")
+        place = core.CPUPlace()
        tensor = var.get_tensor()

        tensor.set_dims([1000, 784])
-        tensor.alloc_float()
+        tensor.alloc_float(place)

        tensor_array = numpy.array(tensor)
        self.assertEqual((1000, 784), tensor_array.shape)
        tensor_array[3, 9] = 1.0
        tensor_array[19, 11] = 2.0
-        tensor.set(tensor_array)
+        tensor.set(tensor_array, place)

        tensor_array_2 = numpy.array(tensor)
        self.assertAlmostEqual(1.0, tensor_array_2[3, 9])