From a53e8a8da6a96e559c0ca38367024f2c5b04c021 Mon Sep 17 00:00:00 2001 From: Brian Liu Date: Sat, 9 Jun 2018 09:23:14 +0800 Subject: [PATCH 1/3] Update MKLDNN integration framework to support Paddle multi-instances Make all blob info saved in global device context to be thread based. Meanwhile save thread id in thread local storage in ParallelDo --- paddle/fluid/platform/device_context.cc | 65 +++++++++++++++++++------ paddle/fluid/platform/device_context.h | 10 +++- 2 files changed, 58 insertions(+), 17 deletions(-) diff --git a/paddle/fluid/platform/device_context.cc b/paddle/fluid/platform/device_context.cc index 7d1cf57253..690ba55279 100644 --- a/paddle/fluid/platform/device_context.cc +++ b/paddle/fluid/platform/device_context.cc @@ -25,6 +25,14 @@ namespace platform { DeviceContextPool* DeviceContextPool::pool = nullptr; +namespace { +// Current thread's id. +thread_local int cur_thread_id = 0; +} + +void set_cur_thread_id(int tid) { cur_thread_id = tid; } +int get_cur_thread_id(void) { return cur_thread_id; } + platform::DeviceContext* DeviceContextPool::Get(const platform::Place& place) { auto it = device_contexts_.find(place); if (it == device_contexts_.end()) { @@ -296,38 +304,65 @@ Place CUDAPinnedDeviceContext::GetPlace() const { return place_; } #ifdef PADDLE_WITH_MKLDNN MKLDNNDeviceContext::MKLDNNDeviceContext(CPUPlace place) - : CPUDeviceContext(place), engine_(mkldnn::engine::cpu, 0), p_blobs_() { - p_blobs_.reset(new std::unordered_map>()); + : CPUDeviceContext(place), engine_(mkldnn::engine::cpu, 0), p_blobmap_() { + p_blobmap_.reset(new BlobMap()); + p_mutex_.reset(new std::mutex()); } void MKLDNNDeviceContext::SetBlob(const std::string& name, std::shared_ptr data) const { - std::unordered_map>* p; - p = p_blobs_.get(); + BlobMap* pMap = p_blobmap_.get(); + std::shared_ptr pBlob = nullptr; + + int tid = platform::get_cur_thread_id(); - auto it = p->find(name); + std::lock_guard lock(*p_mutex_.get()); - if (it == p->end()) { - (*p)[name] = data; // create new blob + // Find KeyBlob for current thread + auto map_it = pMap->find(tid); + + if (map_it == pMap->end()) { + // 1st time to set blob in current thread + pBlob = std::shared_ptr(new KeyBlob()); + (*pMap)[tid] = pBlob; } else { - it->second = data; // set data to existing blob + pBlob = map_it->second; } + // Find Key in found (or newly created) KeyBlob + auto key_it = pBlob->find(name); + + if (key_it == pBlob->end()) { + (*pBlob)[name] = data; // create new blob + } else { + key_it->second = data; // set data to existing blob + } + + // lock will be automatically released when out of scope return; } std::shared_ptr MKLDNNDeviceContext::GetBlob( const std::string& name) const { - std::unordered_map>* p; - p = p_blobs_.get(); + BlobMap* pMap = p_blobmap_.get(); + std::shared_ptr pBlob = nullptr; - auto it = p->find(name); + int tid = platform::get_cur_thread_id(); - if (it != p->end()) { - return it->second; - } + std::lock_guard lock(*p_mutex_.get()); + + // Find KeyBlob for current thread firstly + auto map_it = pMap->find(tid); + if (map_it == pMap->end()) return nullptr; + pBlob = map_it->second; + + // Find Blob via name + auto key_it = pBlob->find(name); + + if (key_it == pBlob->end()) return nullptr; - return nullptr; + // lock will be automatically released when out of scope + return key_it->second; } #endif diff --git a/paddle/fluid/platform/device_context.h b/paddle/fluid/platform/device_context.h index 999bbe00f1..1527c9f324 100644 --- a/paddle/fluid/platform/device_context.h +++ b/paddle/fluid/platform/device_context.h @@ -39,6 +39,12 @@ limitations under the License. */ namespace paddle { namespace platform { +using KeyBlob = std::unordered_map>; +using BlobMap = std::unordered_map>; + +void set_cur_thread_id(int); +int get_cur_thread_id(void); + class DeviceContext { public: virtual ~DeviceContext() {} @@ -191,8 +197,8 @@ class MKLDNNDeviceContext : public CPUDeviceContext { private: mkldnn::engine engine_; - std::shared_ptr>> - p_blobs_; + std::shared_ptr p_blobmap_; + std::shared_ptr p_mutex_; }; #endif From 741cb33bd97dcb121d866acf18458f95527f3a11 Mon Sep 17 00:00:00 2001 From: Sylwester Fraczek Date: Tue, 16 Oct 2018 14:52:45 +0200 Subject: [PATCH 2/3] test multithreading --- paddle/fluid/inference/api/helper.h | 3 ++- paddle/fluid/inference/tests/api/tester_helper.h | 1 + 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/paddle/fluid/inference/api/helper.h b/paddle/fluid/inference/api/helper.h index 24f59cf43a..e46dc13269 100644 --- a/paddle/fluid/inference/api/helper.h +++ b/paddle/fluid/inference/api/helper.h @@ -160,7 +160,8 @@ static void PrintTime(int batch_size, int repeat, int num_threads, int tid, double latency, int epoch = 1) { LOG(INFO) << "====== batch_size: " << batch_size << ", repeat: " << repeat << ", threads: " << num_threads << ", thread id: " << tid - << ", latency: " << latency << "ms ======"; + << ", latency: " << latency << "ms, fps: " << 1 / (latency / 1000.f) + << " ======"; if (epoch > 1) { int samples = batch_size * epoch; LOG(INFO) << "====== sample number: " << samples diff --git a/paddle/fluid/inference/tests/api/tester_helper.h b/paddle/fluid/inference/tests/api/tester_helper.h index 5589b58b06..42072895fc 100644 --- a/paddle/fluid/inference/tests/api/tester_helper.h +++ b/paddle/fluid/inference/tests/api/tester_helper.h @@ -139,6 +139,7 @@ void TestMultiThreadPrediction( } for (int tid = 0; tid < num_threads; ++tid) { threads.emplace_back([&, tid]() { + platform::set_cur_thread_id(static_cast(tid) + 1); // Each thread should have local inputs and outputs. // The inputs of each thread are all the same. std::vector> inputs_tid = inputs; From 2098b42584f0d6c588d2ec62f6b37a4dc8916e68 Mon Sep 17 00:00:00 2001 From: Sylwester Fraczek Date: Wed, 24 Oct 2018 10:26:07 +0200 Subject: [PATCH 3/3] review fixes (Teamcity fails) test=develop --- paddle/fluid/inference/tests/api/tester_helper.h | 2 ++ paddle/fluid/platform/device_context.cc | 16 ++++++++-------- paddle/fluid/platform/device_context.h | 12 ++++++------ 3 files changed, 16 insertions(+), 14 deletions(-) diff --git a/paddle/fluid/inference/tests/api/tester_helper.h b/paddle/fluid/inference/tests/api/tester_helper.h index 42072895fc..19c3f532d5 100644 --- a/paddle/fluid/inference/tests/api/tester_helper.h +++ b/paddle/fluid/inference/tests/api/tester_helper.h @@ -139,7 +139,9 @@ void TestMultiThreadPrediction( } for (int tid = 0; tid < num_threads; ++tid) { threads.emplace_back([&, tid]() { +#ifdef PADDLE_WITH_MKLDNN platform::set_cur_thread_id(static_cast(tid) + 1); +#endif // Each thread should have local inputs and outputs. // The inputs of each thread are all the same. std::vector> inputs_tid = inputs; diff --git a/paddle/fluid/platform/device_context.cc b/paddle/fluid/platform/device_context.cc index 690ba55279..b0de636de4 100644 --- a/paddle/fluid/platform/device_context.cc +++ b/paddle/fluid/platform/device_context.cc @@ -25,14 +25,6 @@ namespace platform { DeviceContextPool* DeviceContextPool::pool = nullptr; -namespace { -// Current thread's id. -thread_local int cur_thread_id = 0; -} - -void set_cur_thread_id(int tid) { cur_thread_id = tid; } -int get_cur_thread_id(void) { return cur_thread_id; } - platform::DeviceContext* DeviceContextPool::Get(const platform::Place& place) { auto it = device_contexts_.find(place); if (it == device_contexts_.end()) { @@ -309,6 +301,14 @@ MKLDNNDeviceContext::MKLDNNDeviceContext(CPUPlace place) p_mutex_.reset(new std::mutex()); } +namespace { +// Current thread's id. +thread_local int cur_thread_id = 0; +} + +void set_cur_thread_id(int tid) { cur_thread_id = tid; } +int get_cur_thread_id(void) { return cur_thread_id; } + void MKLDNNDeviceContext::SetBlob(const std::string& name, std::shared_ptr data) const { BlobMap* pMap = p_blobmap_.get(); diff --git a/paddle/fluid/platform/device_context.h b/paddle/fluid/platform/device_context.h index 1527c9f324..942e13a724 100644 --- a/paddle/fluid/platform/device_context.h +++ b/paddle/fluid/platform/device_context.h @@ -39,12 +39,6 @@ limitations under the License. */ namespace paddle { namespace platform { -using KeyBlob = std::unordered_map>; -using BlobMap = std::unordered_map>; - -void set_cur_thread_id(int); -int get_cur_thread_id(void); - class DeviceContext { public: virtual ~DeviceContext() {} @@ -182,6 +176,12 @@ struct DefaultDeviceContextType { #endif #ifdef PADDLE_WITH_MKLDNN +using KeyBlob = std::unordered_map>; +using BlobMap = std::unordered_map>; + +void set_cur_thread_id(int); +int get_cur_thread_id(void); + class MKLDNNDeviceContext : public CPUDeviceContext { public: explicit MKLDNNDeviceContext(CPUPlace place);