|
|
|
@ -14,6 +14,8 @@ limitations under the License. */
|
|
|
|
|
|
|
|
|
|
#pragma once
|
|
|
|
|
#include <string>
|
|
|
|
|
#include <tuple>
|
|
|
|
|
#include <vector>
|
|
|
|
|
#include "paddle/fluid/framework/lod_tensor.h"
|
|
|
|
|
#include "paddle/fluid/memory/memcpy.h"
|
|
|
|
|
#include "paddle/fluid/platform/device_context.h"
|
|
|
|
@ -208,6 +210,38 @@ void PyCUDATensorSetFromArray(
|
|
|
|
|
sizeof(uint16_t) * array.size(),
|
|
|
|
|
cudaMemcpyHostToDevice, dev_ctx->stream());
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
template <typename T>
|
|
|
|
|
void PyCUDAPinnedTensorSetFromArray(
|
|
|
|
|
framework::Tensor &self,
|
|
|
|
|
py::array_t<T, py::array::c_style | py::array::forcecast> array,
|
|
|
|
|
const paddle::platform::CUDAPinnedPlace &place) {
|
|
|
|
|
std::vector<int64_t> dims;
|
|
|
|
|
dims.reserve(array.ndim());
|
|
|
|
|
for (size_t i = 0; i < array.ndim(); ++i) {
|
|
|
|
|
dims.push_back(static_cast<int>(array.shape()[i]));
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
self.Resize(framework::make_ddim(dims));
|
|
|
|
|
auto *dst = self.mutable_data<T>(place);
|
|
|
|
|
std::memcpy(dst, array.data(), sizeof(T) * array.size());
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
template <>
|
|
|
|
|
void PyCUDAPinnedTensorSetFromArray(
|
|
|
|
|
framework::Tensor &self,
|
|
|
|
|
py::array_t<uint16_t, py::array::c_style | py::array::forcecast> array,
|
|
|
|
|
const paddle::platform::CUDAPinnedPlace &place) {
|
|
|
|
|
std::vector<int64_t> dims;
|
|
|
|
|
dims.reserve(array.ndim());
|
|
|
|
|
for (size_t i = 0; i < array.ndim(); ++i) {
|
|
|
|
|
dims.push_back(static_cast<int>(array.shape()[i]));
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
self.Resize(framework::make_ddim(dims));
|
|
|
|
|
auto *dst = self.mutable_data<platform::float16>(place);
|
|
|
|
|
std::memcpy(dst, array.data(), sizeof(uint16_t) * array.size());
|
|
|
|
|
}
|
|
|
|
|
#endif
|
|
|
|
|
|
|
|
|
|
} // namespace pybind
|
|
|
|
|