Speed up dygraph DataLoader based on shared memory and LoDTensor serialization (#22541)
* add lodtensor share memory & serialization, test=develop * fix windows compile error, test=develop * deal vartype pickle & fix unittest matching error message, test=develop * update timeout variable name, test=develop * refactor memory map implement, test=develop * clear mmap file discripter when exit unexpectedly, test=develop * remove the child process fd in advance, test=develop * remove mmap fds after Queue.put in child process, test=develop * add hard unittests for register exit func, test=develop * fix python2 compatibility problem in unittest, test=develop * fix exception unittest error, test=develop * polish code based review comment, test=developrevert-22710-feature/integrated_ps_api
parent
324f2b3922
commit
7d8d573453
@ -0,0 +1,142 @@
|
|||||||
|
// Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
|
||||||
|
//
|
||||||
|
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
// you may not use this file except in compliance with the License.
|
||||||
|
// You may obtain a copy of the License at
|
||||||
|
//
|
||||||
|
// http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
//
|
||||||
|
// Unless required by applicable law or agreed to in writing, software
|
||||||
|
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
// See the License for the specific language governing permissions and
|
||||||
|
// limitations under the License.
|
||||||
|
|
||||||
|
#ifndef _WIN32
|
||||||
|
|
||||||
|
#include "paddle/fluid/memory/allocation/mmap_allocator.h"
|
||||||
|
|
||||||
|
#include <fcntl.h>
|
||||||
|
#include <stdlib.h>
|
||||||
|
#include <sys/mman.h>
|
||||||
|
#include <sys/stat.h>
|
||||||
|
#include <sys/types.h>
|
||||||
|
#include <unistd.h>
|
||||||
|
|
||||||
|
#include <random>
|
||||||
|
#include <string>
|
||||||
|
#include <utility>
|
||||||
|
|
||||||
|
namespace paddle {
|
||||||
|
namespace memory {
|
||||||
|
namespace allocation {
|
||||||
|
|
||||||
|
MemoryMapWriterAllocation::~MemoryMapWriterAllocation() {
|
||||||
|
PADDLE_ENFORCE_NE(
|
||||||
|
munmap(this->ptr(), this->size()), -1,
|
||||||
|
platform::errors::Unavailable("could not unmap the shared memory file %s",
|
||||||
|
this->ipc_name()));
|
||||||
|
}
|
||||||
|
|
||||||
|
MemoryMapReaderAllocation::~MemoryMapReaderAllocation() {
|
||||||
|
PADDLE_ENFORCE_NE(
|
||||||
|
munmap(this->ptr(), this->size()), -1,
|
||||||
|
platform::errors::Unavailable("could not unmap the shared memory file %s",
|
||||||
|
this->ipc_name()));
|
||||||
|
/* Here we do not pay attention to the result of shm_unlink,
|
||||||
|
because the memory mapped file may have been cleared due to the
|
||||||
|
MemoryMapFdSet::Clear() */
|
||||||
|
shm_unlink(this->ipc_name().c_str());
|
||||||
|
MemoryMapFdSet::Instance().Remove(this->ipc_name());
|
||||||
|
VLOG(3) << "~MemoryMapReaderAllocation: " << this->ipc_name();
|
||||||
|
}
|
||||||
|
|
||||||
|
std::string GetIPCName() {
|
||||||
|
static std::random_device rd;
|
||||||
|
std::string handle = "/paddle_";
|
||||||
|
#ifdef _WIN32
|
||||||
|
handle += std::to_string(GetCurrentProcessId());
|
||||||
|
#else
|
||||||
|
handle += std::to_string(getpid());
|
||||||
|
#endif
|
||||||
|
handle += "_";
|
||||||
|
handle += std::to_string(rd());
|
||||||
|
return std::move(handle);
|
||||||
|
}
|
||||||
|
|
||||||
|
std::shared_ptr<MemoryMapWriterAllocation> AllocateMemoryMapWriterAllocation(
|
||||||
|
size_t size) {
|
||||||
|
const std::string &ipc_name = GetIPCName();
|
||||||
|
int flags = O_RDWR | O_CREAT;
|
||||||
|
|
||||||
|
int fd = shm_open(ipc_name.c_str(), flags, 0644);
|
||||||
|
PADDLE_ENFORCE_NE(
|
||||||
|
fd, -1, platform::errors::Unavailable("File descriptor %s open failed",
|
||||||
|
ipc_name.c_str()));
|
||||||
|
PADDLE_ENFORCE_EQ(ftruncate(fd, size), 0,
|
||||||
|
platform::errors::Unavailable(
|
||||||
|
"Fruncate a file to a specified length failed!"));
|
||||||
|
|
||||||
|
void *ptr = mmap(NULL, size, PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0);
|
||||||
|
PADDLE_ENFORCE_NE(ptr, MAP_FAILED,
|
||||||
|
platform::errors::Unavailable(
|
||||||
|
"Memory map failed when create shared memory."));
|
||||||
|
close(fd);
|
||||||
|
|
||||||
|
return std::make_shared<MemoryMapWriterAllocation>(ptr, size, ipc_name);
|
||||||
|
}
|
||||||
|
|
||||||
|
std::shared_ptr<MemoryMapReaderAllocation> RebuildMemoryMapReaderAllocation(
|
||||||
|
const std::string &ipc_name, size_t size) {
|
||||||
|
int fd = shm_open(ipc_name.c_str(), O_RDONLY, 0644);
|
||||||
|
PADDLE_ENFORCE_NE(
|
||||||
|
fd, -1, platform::errors::Unavailable("File descriptor %s open failed",
|
||||||
|
ipc_name.c_str()));
|
||||||
|
|
||||||
|
void *ptr = mmap(NULL, size, PROT_READ, MAP_SHARED, fd, 0);
|
||||||
|
PADDLE_ENFORCE_NE(ptr, MAP_FAILED,
|
||||||
|
platform::errors::Unavailable(
|
||||||
|
"Memory map failed when rebuild shared memory."));
|
||||||
|
close(fd);
|
||||||
|
return std::make_shared<MemoryMapReaderAllocation>(ptr, size, ipc_name);
|
||||||
|
}
|
||||||
|
|
||||||
|
MemoryMapFdSet &MemoryMapFdSet::Instance() { // NOLINT
|
||||||
|
static MemoryMapFdSet set;
|
||||||
|
return set;
|
||||||
|
}
|
||||||
|
|
||||||
|
void MemoryMapFdSet::Insert(const std::string &ipc_name) {
|
||||||
|
std::lock_guard<std::mutex> guard(mtx_);
|
||||||
|
fd_set_.emplace(ipc_name);
|
||||||
|
VLOG(3) << "PID: " << getpid() << ", MemoryMapFdSet: insert " << ipc_name
|
||||||
|
<< ", set size: " << fd_set_.size();
|
||||||
|
}
|
||||||
|
|
||||||
|
void MemoryMapFdSet::Remove(const std::string &ipc_name) {
|
||||||
|
std::lock_guard<std::mutex> guard(mtx_);
|
||||||
|
fd_set_.erase(ipc_name);
|
||||||
|
VLOG(3) << "PID: " << getpid() << ", MemoryMapFdSet: erase " << ipc_name
|
||||||
|
<< ", set size: " << fd_set_.size();
|
||||||
|
}
|
||||||
|
|
||||||
|
void MemoryMapFdSet::Clear() {
|
||||||
|
VLOG(3) << "PID: " << getpid() << ", MemoryMapFdSet: set size - "
|
||||||
|
<< fd_set_.size();
|
||||||
|
std::lock_guard<std::mutex> guard(mtx_);
|
||||||
|
for (auto fd : fd_set_) {
|
||||||
|
int rlt = shm_unlink(fd.c_str());
|
||||||
|
if (rlt == 0) {
|
||||||
|
VLOG(3) << "PID: " << getpid() << ", MemoryMapFdSet: clear " << fd;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
fd_set_.clear();
|
||||||
|
}
|
||||||
|
|
||||||
|
MemoryMapFdSet::~MemoryMapFdSet() { Clear(); }
|
||||||
|
|
||||||
|
} // namespace allocation
|
||||||
|
} // namespace memory
|
||||||
|
} // namespace paddle
|
||||||
|
|
||||||
|
#endif
|
@ -0,0 +1,90 @@
|
|||||||
|
// Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
|
||||||
|
//
|
||||||
|
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
// you may not use this file except in compliance with the License.
|
||||||
|
// You may obtain a copy of the License at
|
||||||
|
//
|
||||||
|
// http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
//
|
||||||
|
// Unless required by applicable law or agreed to in writing, software
|
||||||
|
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
// See the License for the specific language governing permissions and
|
||||||
|
// limitations under the License.
|
||||||
|
|
||||||
|
#pragma once
|
||||||
|
|
||||||
|
#ifndef _WIN32
|
||||||
|
|
||||||
|
#include <memory>
|
||||||
|
#include <mutex> // NOLINT
|
||||||
|
#include <string>
|
||||||
|
#include <unordered_set>
|
||||||
|
#include <utility>
|
||||||
|
|
||||||
|
#include "paddle/fluid/memory/allocation/allocator.h"
|
||||||
|
|
||||||
|
namespace paddle {
|
||||||
|
namespace memory {
|
||||||
|
namespace allocation {
|
||||||
|
|
||||||
|
class MemoryMapWriterAllocation : public Allocation {
|
||||||
|
public:
|
||||||
|
explicit MemoryMapWriterAllocation(void *ptr, size_t size,
|
||||||
|
std::string ipc_name)
|
||||||
|
: Allocation(ptr, size, platform::CPUPlace()),
|
||||||
|
ipc_name_(std::move(ipc_name)) {}
|
||||||
|
|
||||||
|
inline const std::string &ipc_name() const { return ipc_name_; }
|
||||||
|
|
||||||
|
~MemoryMapWriterAllocation() override;
|
||||||
|
|
||||||
|
private:
|
||||||
|
std::string ipc_name_;
|
||||||
|
};
|
||||||
|
|
||||||
|
class MemoryMapReaderAllocation : public Allocation {
|
||||||
|
public:
|
||||||
|
explicit MemoryMapReaderAllocation(void *ptr, size_t size,
|
||||||
|
std::string ipc_name)
|
||||||
|
: Allocation(ptr, size, platform::CPUPlace()),
|
||||||
|
ipc_name_(std::move(ipc_name)) {}
|
||||||
|
|
||||||
|
inline const std::string &ipc_name() const { return ipc_name_; }
|
||||||
|
|
||||||
|
~MemoryMapReaderAllocation() override;
|
||||||
|
|
||||||
|
private:
|
||||||
|
std::string ipc_name_;
|
||||||
|
};
|
||||||
|
|
||||||
|
std::shared_ptr<MemoryMapWriterAllocation> AllocateMemoryMapWriterAllocation(
|
||||||
|
size_t size);
|
||||||
|
|
||||||
|
std::shared_ptr<MemoryMapReaderAllocation> RebuildMemoryMapReaderAllocation(
|
||||||
|
const std::string &ipc_name, size_t size);
|
||||||
|
|
||||||
|
class MemoryMapFdSet {
|
||||||
|
public:
|
||||||
|
static MemoryMapFdSet &Instance(); // NOLINT
|
||||||
|
|
||||||
|
void Insert(const std::string &ipc_name);
|
||||||
|
|
||||||
|
void Remove(const std::string &ipc_name);
|
||||||
|
|
||||||
|
void Clear();
|
||||||
|
|
||||||
|
~MemoryMapFdSet();
|
||||||
|
|
||||||
|
private:
|
||||||
|
MemoryMapFdSet() = default;
|
||||||
|
|
||||||
|
std::unordered_set<std::string> fd_set_;
|
||||||
|
std::mutex mtx_;
|
||||||
|
};
|
||||||
|
|
||||||
|
} // namespace allocation
|
||||||
|
} // namespace memory
|
||||||
|
} // namespace paddle
|
||||||
|
|
||||||
|
#endif
|
@ -0,0 +1,54 @@
|
|||||||
|
// Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
|
||||||
|
//
|
||||||
|
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
// you may not use this file except in compliance with the License.
|
||||||
|
// You may obtain a copy of the License at
|
||||||
|
//
|
||||||
|
// http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
//
|
||||||
|
// Unless required by applicable law or agreed to in writing, software
|
||||||
|
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
// See the License for the specific language governing permissions and
|
||||||
|
// limitations under the License.
|
||||||
|
|
||||||
|
#ifndef _WIN32
|
||||||
|
|
||||||
|
#include "paddle/fluid/memory/allocation/mmap_allocator.h"
|
||||||
|
|
||||||
|
#include <sys/types.h>
|
||||||
|
|
||||||
|
#include "gtest/gtest.h"
|
||||||
|
|
||||||
|
namespace paddle {
|
||||||
|
namespace memory {
|
||||||
|
namespace allocation {
|
||||||
|
|
||||||
|
TEST(MemoryMapAllocation, test_allocation_base) {
|
||||||
|
size_t data_size = 4UL * 1024;
|
||||||
|
// 1. allocate writer holader
|
||||||
|
auto mmap_writer_holder = AllocateMemoryMapWriterAllocation(data_size);
|
||||||
|
std::string ipc_name = mmap_writer_holder->ipc_name();
|
||||||
|
// 2. write data
|
||||||
|
auto* writer_ptr = static_cast<int32_t*>(mmap_writer_holder->ptr());
|
||||||
|
for (int32_t i = 0; i < 1024; ++i) {
|
||||||
|
writer_ptr[i] = i;
|
||||||
|
}
|
||||||
|
// 3. create child process
|
||||||
|
pid_t fpid = fork();
|
||||||
|
if (fpid == 0) {
|
||||||
|
// 4. rebuild reader holder
|
||||||
|
auto mmap_reader_holder =
|
||||||
|
RebuildMemoryMapReaderAllocation(ipc_name, data_size);
|
||||||
|
auto* reader_ptr = static_cast<int32_t*>(mmap_reader_holder->ptr());
|
||||||
|
for (int32_t i = 0; i < 1024; ++i) {
|
||||||
|
ASSERT_EQ(reader_ptr[i], i);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
} // namespace allocation
|
||||||
|
} // namespace memory
|
||||||
|
} // namespace paddle
|
||||||
|
|
||||||
|
#endif
|
File diff suppressed because it is too large
Load Diff
@ -0,0 +1,84 @@
|
|||||||
|
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
|
||||||
|
#
|
||||||
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
# you may not use this file except in compliance with the License.
|
||||||
|
# You may obtain a copy of the License at
|
||||||
|
#
|
||||||
|
# http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
#
|
||||||
|
# Unless required by applicable law or agreed to in writing, software
|
||||||
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
# See the License for the specific language governing permissions and
|
||||||
|
# limitations under the License.
|
||||||
|
|
||||||
|
import sys
|
||||||
|
import signal
|
||||||
|
import unittest
|
||||||
|
import multiprocessing
|
||||||
|
import time
|
||||||
|
|
||||||
|
import paddle.compat as cpt
|
||||||
|
|
||||||
|
if sys.version_info[0] == 2:
|
||||||
|
import Queue as queue
|
||||||
|
else:
|
||||||
|
import queue
|
||||||
|
|
||||||
|
from paddle.fluid.reader import multiprocess_queue_set, _cleanup, CleanupFuncRegistrar
|
||||||
|
|
||||||
|
# NOTE: These special functions cannot be detected by the existing coverage mechanism,
|
||||||
|
# so the following unittests are added for these internal functions.
|
||||||
|
|
||||||
|
|
||||||
|
class TestDygraphDataLoaderCleanUpFunc(unittest.TestCase):
|
||||||
|
def setUp(self):
|
||||||
|
self.capacity = 10
|
||||||
|
|
||||||
|
def test_clear_queue_set(self):
|
||||||
|
test_queue = queue.Queue(self.capacity)
|
||||||
|
global multiprocess_queue_set
|
||||||
|
multiprocess_queue_set.add(test_queue)
|
||||||
|
for i in range(0, self.capacity):
|
||||||
|
test_queue.put(i)
|
||||||
|
_cleanup()
|
||||||
|
|
||||||
|
|
||||||
|
class TestRegisterExitFunc(unittest.TestCase):
|
||||||
|
# This function does not need to be implemented in this case
|
||||||
|
def none_func(self):
|
||||||
|
pass
|
||||||
|
|
||||||
|
def test_not_callable_func(self):
|
||||||
|
exception = None
|
||||||
|
try:
|
||||||
|
CleanupFuncRegistrar.register(5)
|
||||||
|
except TypeError as ex:
|
||||||
|
self.assertIn("is not callable", cpt.get_exception_message(ex))
|
||||||
|
exception = ex
|
||||||
|
self.assertIsNotNone(exception)
|
||||||
|
|
||||||
|
def test_old_handler_for_sigint(self):
|
||||||
|
CleanupFuncRegistrar.register(
|
||||||
|
function=self.none_func, signals=[signal.SIGINT])
|
||||||
|
|
||||||
|
def test_signal_wrapper_by_sigchld(self):
|
||||||
|
# This function does not need to be implemented in this case
|
||||||
|
def __test_process__():
|
||||||
|
pass
|
||||||
|
|
||||||
|
CleanupFuncRegistrar.register(
|
||||||
|
function=self.none_func, signals=[signal.SIGCHLD])
|
||||||
|
|
||||||
|
exception = None
|
||||||
|
try:
|
||||||
|
test_process = multiprocessing.Process(target=__test_process__)
|
||||||
|
test_process.start()
|
||||||
|
time.sleep(3)
|
||||||
|
except SystemExit as ex:
|
||||||
|
exception = ex
|
||||||
|
self.assertIsNotNone(exception)
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == '__main__':
|
||||||
|
unittest.main()
|
@ -0,0 +1,78 @@
|
|||||||
|
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
|
||||||
|
#
|
||||||
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
# you may not use this file except in compliance with the License.
|
||||||
|
# You may obtain a copy of the License at
|
||||||
|
#
|
||||||
|
# http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
#
|
||||||
|
# Unless required by applicable law or agreed to in writing, software
|
||||||
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
# See the License for the specific language governing permissions and
|
||||||
|
# limitations under the License.
|
||||||
|
|
||||||
|
import sys
|
||||||
|
import unittest
|
||||||
|
import numpy as np
|
||||||
|
import paddle.fluid as fluid
|
||||||
|
from paddle.fluid import core
|
||||||
|
|
||||||
|
|
||||||
|
def get_random_images_and_labels(image_shape, label_shape):
|
||||||
|
image = np.random.random(size=image_shape).astype('float32')
|
||||||
|
label = np.random.random(size=label_shape).astype('int64')
|
||||||
|
return image, label
|
||||||
|
|
||||||
|
|
||||||
|
def batch_generator_creator(batch_size, batch_num):
|
||||||
|
def __reader__():
|
||||||
|
for _ in range(batch_num):
|
||||||
|
batch_image, batch_label = get_random_images_and_labels(
|
||||||
|
[batch_size, 784], [batch_size, 1])
|
||||||
|
yield batch_image, batch_label
|
||||||
|
|
||||||
|
return __reader__
|
||||||
|
|
||||||
|
|
||||||
|
class TestDygraphDataLoaderMmapFdsClear(unittest.TestCase):
|
||||||
|
def setUp(self):
|
||||||
|
self.batch_size = 8
|
||||||
|
self.batch_num = 100
|
||||||
|
self.epoch_num = 2
|
||||||
|
self.capacity = 50
|
||||||
|
|
||||||
|
def prepare_data_loader(self):
|
||||||
|
loader = fluid.io.DataLoader.from_generator(
|
||||||
|
capacity=self.capacity, use_multiprocess=True)
|
||||||
|
loader.set_batch_generator(
|
||||||
|
batch_generator_creator(self.batch_size, self.batch_num),
|
||||||
|
places=fluid.CPUPlace())
|
||||||
|
return loader
|
||||||
|
|
||||||
|
def run_one_epoch_with_break(self, loader):
|
||||||
|
for step_id, data in enumerate(loader()):
|
||||||
|
image, label = data
|
||||||
|
relu = fluid.layers.relu(image)
|
||||||
|
self.assertEqual(image.shape, [self.batch_size, 784])
|
||||||
|
self.assertEqual(label.shape, [self.batch_size, 1])
|
||||||
|
self.assertEqual(relu.shape, [self.batch_size, 784])
|
||||||
|
if step_id == 30:
|
||||||
|
break
|
||||||
|
|
||||||
|
def test_data_loader_break(self):
|
||||||
|
with fluid.dygraph.guard():
|
||||||
|
loader = self.prepare_data_loader()
|
||||||
|
for _ in range(self.epoch_num):
|
||||||
|
self.run_one_epoch_with_break(loader)
|
||||||
|
break
|
||||||
|
|
||||||
|
def test_data_loader_continue_break(self):
|
||||||
|
with fluid.dygraph.guard():
|
||||||
|
loader = self.prepare_data_loader()
|
||||||
|
for _ in range(self.epoch_num):
|
||||||
|
self.run_one_epoch_with_break(loader)
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == '__main__':
|
||||||
|
unittest.main()
|
Loading…
Reference in new issue