Add variant of new load and save ops for storing model params in a single file (#7909)
* Add save_combine_op * Add load_combine_op and test * Add unit-test * Add a delete to free buffer memory * Add new variant of load/save * Fix unit-test * Add another unit test for compatibility with original save/load * Address review comments and simplify logic * Address review comments and simplify code - part 2 * Fix naming issues and CMake problems * Address review comments * Fix LoD information in tests * Address review comments: round 2emailweixu-patch-1
parent
fbd5f689bd
commit
2e907c3613
@ -0,0 +1,108 @@
|
|||||||
|
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
|
||||||
|
|
||||||
|
Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
you may not use this file except in compliance with the License.
|
||||||
|
You may obtain a copy of the License at
|
||||||
|
|
||||||
|
http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
|
||||||
|
Unless required by applicable law or agreed to in writing, software
|
||||||
|
distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
See the License for the specific language governing permissions and
|
||||||
|
limitations under the License. */
|
||||||
|
#include <fstream>
|
||||||
|
|
||||||
|
#include "paddle/framework/op_registry.h"
|
||||||
|
#include "paddle/platform/device_context.h"
|
||||||
|
|
||||||
|
namespace paddle {
|
||||||
|
namespace operators {
|
||||||
|
|
||||||
|
class LoadCombineOp : public framework::OperatorBase {
|
||||||
|
public:
|
||||||
|
LoadCombineOp(const std::string &type,
|
||||||
|
const framework::VariableNameMap &inputs,
|
||||||
|
const framework::VariableNameMap &outputs,
|
||||||
|
const framework::AttributeMap &attrs)
|
||||||
|
: OperatorBase(type, inputs, outputs, attrs) {}
|
||||||
|
void Run(const framework::Scope &scope,
|
||||||
|
const platform::Place &place) const override {
|
||||||
|
auto filename = Attr<std::string>("file_path");
|
||||||
|
|
||||||
|
std::ifstream fin(filename);
|
||||||
|
PADDLE_ENFORCE(static_cast<bool>(fin),
|
||||||
|
"Cannot open file %s for load_combine op", filename);
|
||||||
|
|
||||||
|
auto out_var_names = Outputs("Out");
|
||||||
|
PADDLE_ENFORCE_GT(
|
||||||
|
static_cast<int>(out_var_names.size()), 0,
|
||||||
|
"The number of output variables should be greater than 0.");
|
||||||
|
|
||||||
|
platform::DeviceContextPool &pool = platform::DeviceContextPool::Instance();
|
||||||
|
auto &dev_ctx = *pool.Get(place);
|
||||||
|
|
||||||
|
for (size_t i = 0; i < out_var_names.size(); i++) {
|
||||||
|
auto *out_var = scope.FindVar(out_var_names[i]);
|
||||||
|
|
||||||
|
PADDLE_ENFORCE(out_var != nullptr, "Output variable %s cannot be found",
|
||||||
|
out_var_names[i]);
|
||||||
|
|
||||||
|
auto *tensor = out_var->GetMutable<framework::LoDTensor>();
|
||||||
|
|
||||||
|
// Error checking
|
||||||
|
PADDLE_ENFORCE(static_cast<bool>(fin), "Cannot read more from file %s",
|
||||||
|
filename);
|
||||||
|
|
||||||
|
// Get data from fin to tensor
|
||||||
|
DeserializeFromStream(fin, tensor, dev_ctx);
|
||||||
|
|
||||||
|
if (platform::is_gpu_place(place)) {
|
||||||
|
// copy CPU to GPU
|
||||||
|
framework::LoDTensor cpu_tensor;
|
||||||
|
cpu_tensor.ShareDataWith(*tensor);
|
||||||
|
cpu_tensor.set_lod(tensor->lod());
|
||||||
|
|
||||||
|
// reset tensor
|
||||||
|
out_var->Clear();
|
||||||
|
tensor = out_var->GetMutable<framework::LoDTensor>();
|
||||||
|
tensor->set_lod(cpu_tensor.lod());
|
||||||
|
Copy(cpu_tensor, place, dev_ctx, tensor);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
class LoadCombineOpProtoMaker : public framework::OpProtoAndCheckerMaker {
|
||||||
|
public:
|
||||||
|
LoadCombineOpProtoMaker(OpProto *proto, OpAttrChecker *op_checker)
|
||||||
|
: OpProtoAndCheckerMaker(proto, op_checker) {
|
||||||
|
AddOutput(
|
||||||
|
"Out",
|
||||||
|
"(vector) The output LoDTensors that will be read from the input file.")
|
||||||
|
.AsDuplicable();
|
||||||
|
AddAttr<std::string>("file_path",
|
||||||
|
"(string) "
|
||||||
|
"LoDTensors will be loaded from \"file_path\".")
|
||||||
|
.AddCustomChecker(
|
||||||
|
[](const std::string &path) { return !path.empty(); });
|
||||||
|
AddComment(R"DOC(
|
||||||
|
LoadCombine Operator.
|
||||||
|
|
||||||
|
LoadCombine operator loads LoDTensor variables from a file. The file should
|
||||||
|
contain one or more LoDTensors serialized using the SaveCombine operator. The
|
||||||
|
LoadCombine operator applies a deserialization strategy to appropriately load
|
||||||
|
the LodTensors, and this strategy complements the serialization strategy used
|
||||||
|
in the SaveCombine operator. Hence, the LoadCombine operator is tightly coupled
|
||||||
|
with the SaveCombine operator, and can only deserialize one or more LoDTensors
|
||||||
|
that were saved using the SaveCombine operator.
|
||||||
|
|
||||||
|
)DOC");
|
||||||
|
}
|
||||||
|
};
|
||||||
|
} // namespace operators
|
||||||
|
} // namespace paddle
|
||||||
|
namespace ops = paddle::operators;
|
||||||
|
|
||||||
|
REGISTER_OPERATOR(load_combine, ops::LoadCombineOp,
|
||||||
|
ops::LoadCombineOpProtoMaker);
|
@ -0,0 +1,141 @@
|
|||||||
|
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
|
||||||
|
|
||||||
|
Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
you may not use this file except in compliance with the License.
|
||||||
|
You may obtain a copy of the License at
|
||||||
|
|
||||||
|
http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
|
||||||
|
Unless required by applicable law or agreed to in writing, software
|
||||||
|
distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
See the License for the specific language governing permissions and
|
||||||
|
limitations under the License. */
|
||||||
|
|
||||||
|
#include <stdint.h>
|
||||||
|
#include <sys/stat.h>
|
||||||
|
#include <fstream>
|
||||||
|
#include <numeric>
|
||||||
|
#include <sstream>
|
||||||
|
#include "paddle/framework/data_type.h"
|
||||||
|
#include "paddle/framework/framework.pb.h"
|
||||||
|
#include "paddle/framework/lod_tensor.h"
|
||||||
|
#include "paddle/framework/op_registry.h"
|
||||||
|
#include "paddle/platform/device_context.h"
|
||||||
|
|
||||||
|
namespace paddle {
|
||||||
|
namespace operators {
|
||||||
|
|
||||||
|
// TODO(sidgoyal78): These function are needed by other files (save_op), move
|
||||||
|
// them to paddle::filesystem namespace. (as noted by yuyang18 in save_op).
|
||||||
|
constexpr char kSEP = '/';
|
||||||
|
static bool FileExists(const std::string &filepath) {
|
||||||
|
struct stat buffer;
|
||||||
|
return (stat(filepath.c_str(), &buffer) == 0);
|
||||||
|
}
|
||||||
|
|
||||||
|
static std::string DirName(const std::string &filepath) {
|
||||||
|
auto pos = filepath.rfind(kSEP);
|
||||||
|
if (pos == std::string::npos) {
|
||||||
|
return "";
|
||||||
|
}
|
||||||
|
return filepath.substr(0, pos);
|
||||||
|
}
|
||||||
|
|
||||||
|
static void MkDir(const char *path) {
|
||||||
|
if (mkdir(path, 0755)) {
|
||||||
|
PADDLE_ENFORCE_EQ(errno, EEXIST, "%s mkdir failed!", path);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
static void MkDirRecursively(const char *fullpath) {
|
||||||
|
if (*fullpath == '\0') return; // empty string
|
||||||
|
if (FileExists(fullpath)) return;
|
||||||
|
|
||||||
|
MkDirRecursively(DirName(fullpath).c_str());
|
||||||
|
MkDir(fullpath);
|
||||||
|
}
|
||||||
|
|
||||||
|
class SaveCombineOp : public framework::OperatorBase {
|
||||||
|
public:
|
||||||
|
SaveCombineOp(const std::string &type,
|
||||||
|
const framework::VariableNameMap &inputs,
|
||||||
|
const framework::VariableNameMap &outputs,
|
||||||
|
const framework::AttributeMap &attrs)
|
||||||
|
: OperatorBase(type, inputs, outputs, attrs) {}
|
||||||
|
void Run(const framework::Scope &scope,
|
||||||
|
const platform::Place &place) const override {
|
||||||
|
auto filename = Attr<std::string>("file_path");
|
||||||
|
auto overwrite = Attr<bool>("overwrite");
|
||||||
|
|
||||||
|
bool is_present = FileExists(filename);
|
||||||
|
if (is_present && !overwrite) {
|
||||||
|
PADDLE_THROW("%s exists!, cannot save_combine to it when overwrite=false",
|
||||||
|
filename, overwrite);
|
||||||
|
}
|
||||||
|
|
||||||
|
MkDirRecursively(DirName(filename).c_str());
|
||||||
|
std::ofstream fout(filename);
|
||||||
|
PADDLE_ENFORCE(static_cast<bool>(fout), "Cannot open %s to write",
|
||||||
|
filename);
|
||||||
|
|
||||||
|
auto inp_var_names = Inputs("X");
|
||||||
|
PADDLE_ENFORCE_GT(static_cast<int>(inp_var_names.size()), 0,
|
||||||
|
"The number of input variables should be greater than 0");
|
||||||
|
|
||||||
|
// get device context from pool
|
||||||
|
platform::DeviceContextPool &pool = platform::DeviceContextPool::Instance();
|
||||||
|
auto &dev_ctx = *pool.Get(place);
|
||||||
|
|
||||||
|
for (size_t i = 0; i < inp_var_names.size(); i++) {
|
||||||
|
auto *var = scope.FindVar(inp_var_names[i]);
|
||||||
|
|
||||||
|
PADDLE_ENFORCE(var != nullptr,
|
||||||
|
"Cannot find variable %s for save_combine_op",
|
||||||
|
inp_var_names[i]);
|
||||||
|
PADDLE_ENFORCE(var->IsType<framework::LoDTensor>(),
|
||||||
|
"SaveCombineOp only supports LoDTensor, %s has wrong type",
|
||||||
|
inp_var_names[i]);
|
||||||
|
|
||||||
|
auto &tensor = var->Get<framework::LoDTensor>();
|
||||||
|
// Serialize tensor
|
||||||
|
framework::SerializeToStream(fout, tensor, dev_ctx);
|
||||||
|
}
|
||||||
|
fout.close();
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
class SaveCombineOpProtoMaker : public framework::OpProtoAndCheckerMaker {
|
||||||
|
public:
|
||||||
|
SaveCombineOpProtoMaker(OpProto *proto, OpAttrChecker *op_checker)
|
||||||
|
: OpProtoAndCheckerMaker(proto, op_checker) {
|
||||||
|
AddInput(
|
||||||
|
"X",
|
||||||
|
"(vector) Input LoDTensors that need to be saved together in a file.")
|
||||||
|
.AsDuplicable();
|
||||||
|
AddComment(R"DOC(
|
||||||
|
SaveCombine operator
|
||||||
|
|
||||||
|
This operator will serialize and write a list of input LoDTensor variables
|
||||||
|
to a file on disk.
|
||||||
|
)DOC");
|
||||||
|
AddAttr<bool>("overwrite",
|
||||||
|
"(boolean, default true)"
|
||||||
|
"Overwrite the output file if it exists.")
|
||||||
|
.SetDefault(true);
|
||||||
|
AddAttr<std::string>(
|
||||||
|
"file_path",
|
||||||
|
"(string)"
|
||||||
|
"The \"file_path\" where the LoDTensor variables will be saved.")
|
||||||
|
.AddCustomChecker(
|
||||||
|
[](const std::string &path) { return !path.empty(); });
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
} // namespace operators
|
||||||
|
} // namespace paddle
|
||||||
|
|
||||||
|
namespace ops = paddle::operators;
|
||||||
|
|
||||||
|
REGISTER_OPERATOR(save_combine, ops::SaveCombineOp,
|
||||||
|
ops::SaveCombineOpProtoMaker);
|
@ -0,0 +1,180 @@
|
|||||||
|
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
|
||||||
|
|
||||||
|
Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
you may not use this file except in compliance with the License.
|
||||||
|
You may obtain a copy of the License at
|
||||||
|
|
||||||
|
http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
|
||||||
|
Unless required by applicable law or agreed to in writing, software
|
||||||
|
distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
See the License for the specific language governing permissions and
|
||||||
|
limitations under the License. */
|
||||||
|
|
||||||
|
#include <iostream>
|
||||||
|
#include <string>
|
||||||
|
#include <vector>
|
||||||
|
#include "gtest/gtest.h"
|
||||||
|
#include "paddle/framework/op_registry.h"
|
||||||
|
|
||||||
|
USE_NO_KERNEL_OP(save_combine);
|
||||||
|
USE_NO_KERNEL_OP(load_combine);
|
||||||
|
|
||||||
|
int* CreateForSaveCombineOp(int x, int y, const std::vector<int>& lod_info,
|
||||||
|
std::string var_name,
|
||||||
|
paddle::platform::CPUPlace& place,
|
||||||
|
paddle::framework::Scope& scope,
|
||||||
|
paddle::framework::LoD& expect_lod) {
|
||||||
|
auto var = scope.Var(var_name);
|
||||||
|
auto tensor = var->GetMutable<paddle::framework::LoDTensor>();
|
||||||
|
tensor->Resize({x, y});
|
||||||
|
expect_lod.resize(1);
|
||||||
|
for (size_t i = 0; i < lod_info.size(); i++) {
|
||||||
|
expect_lod[0].push_back(lod_info[i]);
|
||||||
|
}
|
||||||
|
tensor->set_lod(expect_lod);
|
||||||
|
int* expect = tensor->mutable_data<int>(place);
|
||||||
|
for (int64_t i = 0; i < tensor->numel(); ++i) {
|
||||||
|
expect[i] = static_cast<int>(i);
|
||||||
|
}
|
||||||
|
return expect;
|
||||||
|
}
|
||||||
|
|
||||||
|
paddle::framework::LoDTensor* GeneratePlaceholderBeforeLoad(
|
||||||
|
const std::string out_var_name, paddle::framework::Scope& scope) {
|
||||||
|
auto load_var = scope.Var(out_var_name);
|
||||||
|
auto target = load_var->GetMutable<paddle::framework::LoDTensor>();
|
||||||
|
return target;
|
||||||
|
}
|
||||||
|
|
||||||
|
int* GetValuesAfterLoadCombineOp(paddle::framework::LoDTensor* target,
|
||||||
|
paddle::framework::Scope& scope,
|
||||||
|
paddle::framework::LoD& actual_lod) {
|
||||||
|
int* actual = target->data<int>();
|
||||||
|
actual_lod = target->lod();
|
||||||
|
return actual;
|
||||||
|
}
|
||||||
|
|
||||||
|
void CheckValues(int* expect, int* actual, paddle::framework::LoD expect_lod,
|
||||||
|
paddle::framework::LoD actual_lod, const int& numel) {
|
||||||
|
for (int64_t i = 0; i < numel; ++i) {
|
||||||
|
EXPECT_EQ(expect[i], actual[i]);
|
||||||
|
}
|
||||||
|
EXPECT_EQ(expect_lod.size(), actual_lod.size());
|
||||||
|
for (size_t i = 0; i < expect_lod.size(); ++i) {
|
||||||
|
for (size_t j = 0; j < expect_lod[i].size(); ++j) {
|
||||||
|
EXPECT_EQ(expect_lod[i][j], actual_lod[i][j]);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Here, we create 4 LoDTensors and use save_combine_op to first save these
|
||||||
|
// in a single file. Then, we use load_combine_op to load these sequentially
|
||||||
|
TEST(SaveLoadCombineOp, CPU) {
|
||||||
|
paddle::framework::Scope scope;
|
||||||
|
paddle::platform::CPUPlace place;
|
||||||
|
|
||||||
|
std::vector<int> lod1 = {0, 1, 2, 3, 10};
|
||||||
|
int numel1 = 100;
|
||||||
|
paddle::framework::LoD expect_lod1;
|
||||||
|
int* expect1 = CreateForSaveCombineOp(10, 10, lod1, "test_var1", place, scope,
|
||||||
|
expect_lod1);
|
||||||
|
|
||||||
|
std::vector<int> lod2 = {0, 2, 5, 10};
|
||||||
|
int numel2 = 200;
|
||||||
|
paddle::framework::LoD expect_lod2;
|
||||||
|
int* expect2 = CreateForSaveCombineOp(10, 20, lod2, "test_var2", place, scope,
|
||||||
|
expect_lod2);
|
||||||
|
|
||||||
|
std::vector<int> lod3 = {0, 2, 3, 20};
|
||||||
|
int numel3 = 4000;
|
||||||
|
paddle::framework::LoD expect_lod3;
|
||||||
|
int* expect3 = CreateForSaveCombineOp(20, 200, lod3, "test_var3", place,
|
||||||
|
scope, expect_lod3);
|
||||||
|
|
||||||
|
std::vector<int> lod4 = {0, 1, 20};
|
||||||
|
int numel4 = 1000;
|
||||||
|
paddle::framework::LoD expect_lod4;
|
||||||
|
int* expect4 = CreateForSaveCombineOp(20, 50, lod4, "test_var4", place, scope,
|
||||||
|
expect_lod4);
|
||||||
|
|
||||||
|
// Set attributes
|
||||||
|
std::string filename = "check_tensor.ls";
|
||||||
|
paddle::framework::AttributeMap attrs;
|
||||||
|
attrs.insert({"file_path", std::string(filename)});
|
||||||
|
|
||||||
|
// Run the save_combine_op
|
||||||
|
auto save_combine_op = paddle::framework::OpRegistry::CreateOp(
|
||||||
|
"save_combine",
|
||||||
|
{{"X", {"test_var1", "test_var2", "test_var3", "test_var4"}}}, {}, attrs);
|
||||||
|
save_combine_op->Run(scope, place);
|
||||||
|
|
||||||
|
// Set up output vars
|
||||||
|
auto target1 = GeneratePlaceholderBeforeLoad("out_var1", scope);
|
||||||
|
auto target2 = GeneratePlaceholderBeforeLoad("out_var2", scope);
|
||||||
|
auto target3 = GeneratePlaceholderBeforeLoad("out_var3", scope);
|
||||||
|
auto target4 = GeneratePlaceholderBeforeLoad("out_var4", scope);
|
||||||
|
|
||||||
|
// Run the load_combine_op
|
||||||
|
auto load_combine_op = paddle::framework::OpRegistry::CreateOp(
|
||||||
|
"load_combine", {},
|
||||||
|
{{"Out", {"out_var1", "out_var2", "out_var3", "out_var4"}}}, attrs);
|
||||||
|
load_combine_op->Run(scope, place);
|
||||||
|
|
||||||
|
paddle::framework::LoD actual_lod1, actual_lod2, actual_lod3, actual_lod4;
|
||||||
|
int* actual1 = GetValuesAfterLoadCombineOp(target1, scope, actual_lod1);
|
||||||
|
int* actual2 = GetValuesAfterLoadCombineOp(target2, scope, actual_lod2);
|
||||||
|
int* actual3 = GetValuesAfterLoadCombineOp(target3, scope, actual_lod3);
|
||||||
|
int* actual4 = GetValuesAfterLoadCombineOp(target4, scope, actual_lod4);
|
||||||
|
|
||||||
|
CheckValues(expect1, actual1, expect_lod1, actual_lod1, numel1);
|
||||||
|
CheckValues(expect2, actual2, expect_lod2, actual_lod2, numel2);
|
||||||
|
CheckValues(expect3, actual3, expect_lod3, actual_lod3, numel3);
|
||||||
|
CheckValues(expect4, actual4, expect_lod4, actual_lod4, numel4);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Test with original SaveLoadTest
|
||||||
|
TEST(SaveLoadTestWithCombineOp, CPU) {
|
||||||
|
paddle::framework::Scope scope;
|
||||||
|
paddle::platform::CPUPlace place;
|
||||||
|
|
||||||
|
auto var = scope.Var("test_var");
|
||||||
|
auto tensor = var->GetMutable<paddle::framework::LoDTensor>();
|
||||||
|
tensor->Resize({3, 10});
|
||||||
|
paddle::framework::LoD expect_lod;
|
||||||
|
expect_lod.resize(1);
|
||||||
|
expect_lod[0].push_back(0);
|
||||||
|
expect_lod[0].push_back(1);
|
||||||
|
expect_lod[0].push_back(2);
|
||||||
|
expect_lod[0].push_back(3);
|
||||||
|
|
||||||
|
tensor->set_lod(expect_lod);
|
||||||
|
int* expect = tensor->mutable_data<int>(place);
|
||||||
|
for (int64_t i = 0; i < tensor->numel(); ++i) {
|
||||||
|
expect[i] = static_cast<int>(i);
|
||||||
|
}
|
||||||
|
paddle::framework::AttributeMap attrs;
|
||||||
|
attrs.insert({"file_path", std::string("check_t.save")});
|
||||||
|
|
||||||
|
auto save_op = paddle::framework::OpRegistry::CreateOp(
|
||||||
|
"save_combine", {{"X", {"test_var"}}}, {}, attrs);
|
||||||
|
save_op->Run(scope, place);
|
||||||
|
|
||||||
|
auto load_var = scope.Var("out_var");
|
||||||
|
auto target = load_var->GetMutable<paddle::framework::LoDTensor>();
|
||||||
|
auto load_op = paddle::framework::OpRegistry::CreateOp(
|
||||||
|
"load_combine", {}, {{"Out", {"out_var"}}}, attrs);
|
||||||
|
load_op->Run(scope, place);
|
||||||
|
int* actual = target->data<int>();
|
||||||
|
for (int64_t i = 0; i < tensor->numel(); ++i) {
|
||||||
|
EXPECT_EQ(expect[i], actual[i]);
|
||||||
|
}
|
||||||
|
auto& actual_lod = target->lod();
|
||||||
|
EXPECT_EQ(expect_lod.size(), actual_lod.size());
|
||||||
|
for (size_t i = 0; i < expect_lod.size(); ++i) {
|
||||||
|
for (size_t j = 0; j < expect_lod[i].size(); ++j) {
|
||||||
|
EXPECT_EQ(expect_lod[i][j], actual_lod[i][j]);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
Loading…
Reference in new issue