serialize the PaddleTensor, test=develop (#22810)
* encapsulate the PaddleTensorToLoDTensor, test=develop * serialize the pd_tensor, test=develop * serialize tensors to file, test=developrevert-22710-feature/integrated_ps_api
parent
72ff5a09c3
commit
1861ca88f1
@ -1,2 +1,4 @@
|
||||
cc_library(benchmark SRCS benchmark.cc DEPS enforce)
|
||||
cc_test(test_benchmark SRCS benchmark_tester.cc DEPS benchmark)
|
||||
cc_library(infer_io_utils SRCS io_utils.cc DEPS paddle_inference_api lod_tensor)
|
||||
cc_test(infer_io_utils_tester SRCS io_utils_tester.cc DEPS infer_io_utils)
|
||||
|
@ -0,0 +1,163 @@
|
||||
// Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
#include "paddle/fluid/inference/utils/io_utils.h"
|
||||
#include <string>
|
||||
#include <vector>
|
||||
#include "paddle/fluid/inference/analysis/helper.h"
|
||||
|
||||
namespace paddle {
|
||||
namespace inference {
|
||||
|
||||
// =========================================================
|
||||
// Item | Type | Bytes
|
||||
// ---------------------------------------------------------
|
||||
// Version | uint32_t | 4
|
||||
// ---------------------------------------------------------
|
||||
// Bytes of `Name` | uint64_t | 8
|
||||
// Name | char | Bytes of `Name`
|
||||
// ---------------------------------------------------------
|
||||
// LoD Level | uint64_t | 8
|
||||
// Bytes of `LoD[0]`| uint64_t | 8
|
||||
// LoD[0] | uint64_t | Bytes of `LoD[0]`
|
||||
// ... | ... | ...
|
||||
// ---------------------------------------------------------
|
||||
// Dims of `Shape` | uint64_t | 8
|
||||
// Shape | uint64_t | Dims * 4
|
||||
// ---------------------------------------------------------
|
||||
// Dtype | int32_t | 4
|
||||
// Bytes of `Data` | uint64_t | 8
|
||||
// Data | Dtype | Bytes of `Data`
|
||||
// =========================================================
|
||||
void SerializePDTensorToStream(std::ostream *os, const PaddleTensor &tensor) {
|
||||
// 1. Version
|
||||
os->write(reinterpret_cast<const char *>(&kCurPDTensorVersion),
|
||||
sizeof(kCurPDTensorVersion));
|
||||
// 2. Name
|
||||
uint64_t name_bytes = tensor.name.size();
|
||||
os->write(reinterpret_cast<char *>(&name_bytes), sizeof(name_bytes));
|
||||
os->write(tensor.name.c_str(), name_bytes);
|
||||
// 3. LoD
|
||||
auto lod = tensor.lod;
|
||||
uint64_t lod_size = lod.size();
|
||||
os->write(reinterpret_cast<const char *>(&lod_size), sizeof(lod_size));
|
||||
for (auto &each : lod) {
|
||||
auto size = each.size() * sizeof(size_t);
|
||||
os->write(reinterpret_cast<const char *>(&size), sizeof(size));
|
||||
os->write(reinterpret_cast<const char *>(each.data()),
|
||||
static_cast<std::streamsize>(size));
|
||||
}
|
||||
// 4. Shape
|
||||
size_t dims = tensor.shape.size();
|
||||
os->write(reinterpret_cast<const char *>(&dims), sizeof(dims));
|
||||
os->write(reinterpret_cast<const char *>(tensor.shape.data()),
|
||||
sizeof(int) * dims);
|
||||
// 5. Data
|
||||
os->write(reinterpret_cast<const char *>(&tensor.dtype),
|
||||
sizeof(tensor.dtype));
|
||||
uint64_t length = tensor.data.length();
|
||||
os->write(reinterpret_cast<const char *>(&length), sizeof(size_t));
|
||||
os->write(reinterpret_cast<const char *>(tensor.data.data()), length);
|
||||
}
|
||||
|
||||
void DeserializePDTensorToStream(std::istream &is, PaddleTensor *tensor) {
|
||||
// 1. Version
|
||||
uint32_t version;
|
||||
is.read(reinterpret_cast<char *>(&version), sizeof(version));
|
||||
// 2. Name
|
||||
uint64_t name_bytes;
|
||||
is.read(reinterpret_cast<char *>(&name_bytes), sizeof(name_bytes));
|
||||
std::vector<char> bytes(name_bytes);
|
||||
is.read(bytes.data(), name_bytes);
|
||||
tensor->name = std::string(bytes.data(), name_bytes);
|
||||
// 3. LoD
|
||||
uint64_t lod_level;
|
||||
is.read(reinterpret_cast<char *>(&lod_level), sizeof(lod_level));
|
||||
auto *lod = &(tensor->lod);
|
||||
lod->resize(lod_level);
|
||||
for (uint64_t i = 0; i < lod_level; ++i) {
|
||||
uint64_t size;
|
||||
is.read(reinterpret_cast<char *>(&size), sizeof(size));
|
||||
std::vector<size_t> tmp(size / sizeof(size_t));
|
||||
is.read(reinterpret_cast<char *>(tmp.data()),
|
||||
static_cast<std::streamsize>(size));
|
||||
(*lod)[i] = tmp;
|
||||
}
|
||||
// 4. Shape
|
||||
size_t dims;
|
||||
is.read(reinterpret_cast<char *>(&dims), sizeof(dims));
|
||||
tensor->shape.resize(dims);
|
||||
is.read(reinterpret_cast<char *>(tensor->shape.data()), sizeof(int) * dims);
|
||||
// 5. Data
|
||||
uint64_t length;
|
||||
is.read(reinterpret_cast<char *>(&tensor->dtype), sizeof(tensor->dtype));
|
||||
is.read(reinterpret_cast<char *>(&length), sizeof(length));
|
||||
tensor->data.Resize(length);
|
||||
is.read(reinterpret_cast<char *>(tensor->data.data()), length);
|
||||
}
|
||||
|
||||
// =========================================================
|
||||
// Item | Type | Bytes
|
||||
// ---------------------------------------------------------
|
||||
// Version | uint32_t | 4
|
||||
// ---------------------------------------------------------
|
||||
// Size of Tensors | uint64_t | 8
|
||||
// Tensors | ---- | ---
|
||||
// ---------------------------------------------------------
|
||||
void SerializePDTensorsToStream(std::ostream *os,
|
||||
const std::vector<PaddleTensor> &tensors) {
|
||||
// 1. Version
|
||||
os->write(reinterpret_cast<const char *>(&kCurPDTensorVersion),
|
||||
sizeof(kCurPDTensorVersion));
|
||||
// 2. Tensors
|
||||
uint64_t num = tensors.size();
|
||||
os->write(reinterpret_cast<char *>(&num), sizeof(num));
|
||||
for (const auto &tensor : tensors) {
|
||||
SerializePDTensorToStream(os, tensor);
|
||||
}
|
||||
}
|
||||
|
||||
void DeserializePDTensorsToStream(std::istream &is,
|
||||
std::vector<PaddleTensor> *tensors) {
|
||||
// 1. Version
|
||||
uint32_t version;
|
||||
is.read(reinterpret_cast<char *>(&version), sizeof(version));
|
||||
// 2. Tensors
|
||||
uint64_t num;
|
||||
is.read(reinterpret_cast<char *>(&num), sizeof(num));
|
||||
tensors->resize(num);
|
||||
for (auto &tensor : *tensors) {
|
||||
DeserializePDTensorToStream(is, &tensor);
|
||||
}
|
||||
}
|
||||
|
||||
void SerializePDTensorsToFile(const std::string &path,
|
||||
const std::vector<PaddleTensor> &tensors) {
|
||||
std::ofstream fout(path, std::ios::binary);
|
||||
SerializePDTensorsToStream(&fout, tensors);
|
||||
fout.close();
|
||||
}
|
||||
|
||||
void DeserializePDTensorsToFile(const std::string &path,
|
||||
std::vector<PaddleTensor> *tensors) {
|
||||
bool is_present = analysis::FileExists(path);
|
||||
PADDLE_ENFORCE_EQ(is_present, true, platform::errors::InvalidArgument(
|
||||
"Cannot open %s to read", path));
|
||||
std::ifstream fin(path, std::ios::binary);
|
||||
DeserializePDTensorsToStream(fin, tensors);
|
||||
fin.close();
|
||||
}
|
||||
|
||||
} // namespace inference
|
||||
} // namespace paddle
|
@ -0,0 +1,40 @@
|
||||
// Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <string>
|
||||
#include <vector>
|
||||
#include "paddle/fluid/framework/lod_tensor.h"
|
||||
#include "paddle/fluid/inference/api/paddle_api.h"
|
||||
|
||||
namespace paddle {
|
||||
namespace inference {
|
||||
|
||||
constexpr uint32_t kCurPDTensorVersion = 0;
|
||||
|
||||
void SerializePDTensorToStream(std::ostream* os, const PaddleTensor& tensor);
|
||||
void DeserializePDTensorToStream(std::istream& is, PaddleTensor* tensor);
|
||||
|
||||
void SerializePDTensorsToStream(std::ostream* os,
|
||||
const std::vector<PaddleTensor>& tensors);
|
||||
void DeserializePDTensorsToStream(std::istream& is,
|
||||
std::vector<PaddleTensor>* tensors);
|
||||
|
||||
void SerializePDTensorsToFile(const std::string& path,
|
||||
const std::vector<PaddleTensor>& tensors);
|
||||
void DeserializePDTensorsToFile(const std::string& path,
|
||||
std::vector<PaddleTensor>* tensors);
|
||||
} // namespace inference
|
||||
} // namespace paddle
|
@ -0,0 +1,97 @@
|
||||
// Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
#include <glog/logging.h>
|
||||
#include <gtest/gtest.h>
|
||||
|
||||
#include "paddle/fluid/inference/api/helper.h"
|
||||
#include "paddle/fluid/inference/utils/io_utils.h"
|
||||
|
||||
namespace paddle {
|
||||
namespace inference {
|
||||
namespace {
|
||||
|
||||
bool pd_tensor_equal(const paddle::PaddleTensor& ref,
|
||||
const paddle::PaddleTensor& t) {
|
||||
bool is_equal = true;
|
||||
VLOG(3) << "ref.name: " << ref.name << ", t.name: " << t.name;
|
||||
VLOG(3) << "ref.dtype: " << ref.dtype << ", t.dtype: " << t.dtype;
|
||||
VLOG(3) << "ref.lod_level: " << ref.lod.size()
|
||||
<< ", t.dtype: " << t.lod.size();
|
||||
VLOG(3) << "ref.data_len: " << ref.data.length()
|
||||
<< ", t.data_len: " << t.data.length();
|
||||
return is_equal && (ref.name == t.name) && (ref.lod == t.lod) &&
|
||||
(ref.dtype == t.dtype) &&
|
||||
(std::memcmp(ref.data.data(), t.data.data(), ref.data.length()) == 0);
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
void test_io_utils() {
|
||||
std::vector<T> input({6, 8});
|
||||
paddle::PaddleTensor in;
|
||||
in.name = "Hello";
|
||||
in.shape = {1, 2};
|
||||
in.lod = std::vector<std::vector<size_t>>{{0, 1}};
|
||||
in.data = paddle::PaddleBuf(static_cast<void*>(input.data()),
|
||||
input.size() * sizeof(T));
|
||||
in.dtype = paddle::inference::PaddleTensorGetDType<T>();
|
||||
std::stringstream ss;
|
||||
paddle::inference::SerializePDTensorToStream(&ss, in);
|
||||
paddle::PaddleTensor out;
|
||||
paddle::inference::DeserializePDTensorToStream(ss, &out);
|
||||
ASSERT_TRUE(pd_tensor_equal(in, out));
|
||||
}
|
||||
} // namespace
|
||||
} // namespace inference
|
||||
} // namespace paddle
|
||||
|
||||
TEST(infer_io_utils, float32) { paddle::inference::test_io_utils<float>(); }
|
||||
TEST(infer_io_utils, int64) { paddle::inference::test_io_utils<int64_t>(); }
|
||||
|
||||
TEST(infer_io_utils, tensors) {
|
||||
// Create a float32 tensor.
|
||||
std::vector<float> input_fp32({1.1f, 3.2f, 5.0f, 8.2f});
|
||||
paddle::PaddleTensor in_fp32;
|
||||
in_fp32.name = "Tensor.fp32_0";
|
||||
in_fp32.shape = {2, 2};
|
||||
in_fp32.data = paddle::PaddleBuf(static_cast<void*>(input_fp32.data()),
|
||||
input_fp32.size() * sizeof(float));
|
||||
in_fp32.dtype = paddle::inference::PaddleTensorGetDType<float>();
|
||||
|
||||
// Create a int64 tensor.
|
||||
std::vector<float> input_int64({5, 8});
|
||||
paddle::PaddleTensor in_int64;
|
||||
in_int64.name = "Tensor.int64_0";
|
||||
in_int64.shape = {1, 2};
|
||||
in_int64.lod = std::vector<std::vector<size_t>>{{0, 1}};
|
||||
in_int64.data = paddle::PaddleBuf(static_cast<void*>(input_int64.data()),
|
||||
input_int64.size() * sizeof(int64_t));
|
||||
in_int64.dtype = paddle::inference::PaddleTensorGetDType<int64_t>();
|
||||
|
||||
// Serialize tensors.
|
||||
std::vector<paddle::PaddleTensor> tensors_in({in_fp32, in_int64});
|
||||
std::string file_path = "./io_utils_tensors";
|
||||
paddle::inference::SerializePDTensorsToFile(file_path, tensors_in);
|
||||
|
||||
// Deserialize tensors.
|
||||
std::vector<paddle::PaddleTensor> tensors_out;
|
||||
paddle::inference::DeserializePDTensorsToFile(file_path, &tensors_out);
|
||||
|
||||
// Check results.
|
||||
ASSERT_EQ(tensors_in.size(), tensors_out.size());
|
||||
for (size_t i = 0; i < tensors_in.size(); ++i) {
|
||||
ASSERT_TRUE(
|
||||
paddle::inference::pd_tensor_equal(tensors_in[i], tensors_out[i]));
|
||||
}
|
||||
}
|
Loading…
Reference in new issue