added internal and external reorders to profiler (#29443)

* added external reorder to profiler

* added external and internal reorders to profiler

* added internal and external reorder to profiler

* added formatting to int/ext reorder commit

* removed unnecessary comment
revert-31562-mean
jakpiase 5 years ago committed by GitHub
parent 2480bdef6c
commit 57a4f16d9e
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23

@ -13,8 +13,8 @@
// limitations under the License. // limitations under the License.
#include "paddle/fluid/framework/data_layout_transform.h" #include "paddle/fluid/framework/data_layout_transform.h"
#include <string> #include <string>
#include "paddle/fluid/platform/profiler.h"
#include "paddle/fluid/operators/math/math_function.h" #include "paddle/fluid/operators/math/math_function.h"
#ifdef PADDLE_WITH_MKLDNN #ifdef PADDLE_WITH_MKLDNN
@ -194,6 +194,8 @@ void innerTransDataLayoutFromMKLDNN(DataLayout in_layout, DataLayout out_layout,
handler.AcquireReorder(reorder_dst_memory_p, reorder_src_memory_p); handler.AcquireReorder(reorder_dst_memory_p, reorder_src_memory_p);
mkldnn::stream astream(cpu_engine); mkldnn::stream astream(cpu_engine);
platform::RecordEvent record_reorder("ext_reorder",
platform::EventRole::kUniqueOp);
reorder_p->execute(astream, *reorder_src_memory_p, *reorder_dst_memory_p); reorder_p->execute(astream, *reorder_src_memory_p, *reorder_dst_memory_p);
astream.wait(); astream.wait();
} else { } else {

@ -808,9 +808,13 @@ class ConvMKLDNNOpKernel : public paddle::framework::OpKernel<T> {
user_src_memory_p = std::static_pointer_cast<mkldnn::memory>( user_src_memory_p = std::static_pointer_cast<mkldnn::memory>(
dev_ctx.GetBlob(user_src_key)); dev_ctx.GetBlob(user_src_key));
user_src_memory_p->set_data_handle(to_void_cast<T>(input_data)); user_src_memory_p->set_data_handle(to_void_cast<T>(input_data));
src_memory_reorder_p->execute(astream, *user_src_memory_p, {
*src_memory_p); platform::RecordEvent record_reorder("int_reorder",
astream.wait(); platform::EventRole::kUniqueOp);
src_memory_reorder_p->execute(astream, *user_src_memory_p,
*src_memory_p);
astream.wait();
}
} else if (src_memory_p) { } else if (src_memory_p) {
src_memory_p->set_data_handle(to_void_cast<T>(input_data)); src_memory_p->set_data_handle(to_void_cast<T>(input_data));
} }
@ -840,9 +844,13 @@ class ConvMKLDNNOpKernel : public paddle::framework::OpKernel<T> {
if (residual_reorder_p) { if (residual_reorder_p) {
auto user_residual_data_p = std::static_pointer_cast<mkldnn::memory>( auto user_residual_data_p = std::static_pointer_cast<mkldnn::memory>(
dev_ctx.GetBlob(user_residual_key)); dev_ctx.GetBlob(user_residual_key));
residual_reorder_p->execute(astream, *user_residual_data_p, {
*dst_memory_p); platform::RecordEvent record_reorder("int_reorder",
astream.wait(); platform::EventRole::kUniqueOp);
residual_reorder_p->execute(astream, *user_residual_data_p,
*dst_memory_p);
astream.wait();
}
} }
auto bias_memory_p = auto bias_memory_p =
@ -1094,9 +1102,13 @@ class ConvMKLDNNGradOpKernel : public paddle::framework::OpKernel<T> {
auto reorder_p = auto reorder_p =
handler.AcquireReorder(reorder_dst_memory_p, diff_weights_memory_p); handler.AcquireReorder(reorder_dst_memory_p, diff_weights_memory_p);
reorder_p->execute(astream, *diff_weights_memory_p, {
*reorder_dst_memory_p); platform::RecordEvent record_reorder("int_reorder",
astream.wait(); platform::EventRole::kUniqueOp);
reorder_p->execute(astream, *diff_weights_memory_p,
*reorder_dst_memory_p);
astream.wait();
}
// So here we have a data in goihw , which can be interpreted as OIHW // So here we have a data in goihw , which can be interpreted as OIHW
// (OIDHW for conv3d) // (OIDHW for conv3d)

@ -281,8 +281,13 @@ class FCPrimitiveFactory {
auto reorder = mkldnn::reorder(src_mem, *dst_mem); auto reorder = mkldnn::reorder(src_mem, *dst_mem);
mkldnn::stream astream(engine_); mkldnn::stream astream(engine_);
reorder.execute(astream, src_mem, *dst_mem);
astream.wait(); {
platform::RecordEvent record_reorder("int_reorder",
platform::EventRole::kUniqueOp);
reorder.execute(astream, src_mem, *dst_mem);
astream.wait();
}
return dst_mem; return dst_mem;
} }
@ -305,9 +310,13 @@ class FCPrimitiveFactory {
auto reorder = mkldnn::reorder(*src_mem, *dst_mem, attributes); auto reorder = mkldnn::reorder(*src_mem, *dst_mem, attributes);
mkldnn::stream astream(engine_); mkldnn::stream astream(engine_);
reorder.execute(astream, {
{{MKLDNN_ARG_FROM, *src_mem}, {MKLDNN_ARG_TO, *dst_mem}}); platform::RecordEvent record_reorder("int_reorder",
astream.wait(); platform::EventRole::kUniqueOp);
reorder.execute(astream,
{{MKLDNN_ARG_FROM, *src_mem}, {MKLDNN_ARG_TO, *dst_mem}});
astream.wait();
}
return dst_mem; return dst_mem;
} }

@ -110,8 +110,12 @@ class MulPrimitiveFactory {
auto reorder = mkldnn::reorder(reorder_pd); auto reorder = mkldnn::reorder(reorder_pd);
mkldnn::stream astream(engine_); mkldnn::stream astream(engine_);
reorder.execute(astream, src_mem, dst_mem); {
astream.wait(); platform::RecordEvent record_reorder("int_reorder",
platform::EventRole::kUniqueOp);
reorder.execute(astream, src_mem, dst_mem);
astream.wait();
}
return dst_mem; return dst_mem;
} }
@ -267,8 +271,13 @@ class MulPrimitiveFactory {
auto reorder = mkldnn::reorder(src_mem, dst_mem); auto reorder = mkldnn::reorder(src_mem, dst_mem);
mkldnn::stream astream(engine_); mkldnn::stream astream(engine_);
reorder.execute(astream, src_mem, dst_mem);
astream.wait(); {
platform::RecordEvent record_reorder("int_reorder",
platform::EventRole::kUniqueOp);
reorder.execute(astream, src_mem, dst_mem);
astream.wait();
}
return dst_mem; return dst_mem;
} }

@ -139,8 +139,12 @@ class QuantOpKernel : public framework::OpKernel<T> {
} }
mkldnn::stream astream(engine); mkldnn::stream astream(engine);
reorder_p->execute(astream, *src_memory, *dst_memory); {
astream.wait(); platform::RecordEvent record_reorder("int_reorder",
platform::EventRole::kUniqueOp);
reorder_p->execute(astream, *src_memory, *dst_memory);
astream.wait();
}
output->set_layout(DataLayout::kMKLDNN); output->set_layout(DataLayout::kMKLDNN);
output->set_format(GetMKLDNNFormat(*dst_memory)); output->set_format(GetMKLDNNFormat(*dst_memory));

@ -138,8 +138,12 @@ class ReQuantOpKernel : public framework::OpKernel<T> {
} }
dnnl::stream astream(engine); dnnl::stream astream(engine);
reorder_p->execute(astream, *src_memory, *dst_memory); {
astream.wait(); platform::RecordEvent record_reorder("int_reorder",
platform::EventRole::kUniqueOp);
reorder_p->execute(astream, *src_memory, *dst_memory);
astream.wait();
}
output->set_layout(framework::DataLayout::kMKLDNN); output->set_layout(framework::DataLayout::kMKLDNN);
output->set_format(platform::GetMKLDNNFormat(*dst_memory)); output->set_format(platform::GetMKLDNNFormat(*dst_memory));

@ -197,8 +197,12 @@ class SumMKLDNNOpKernel : public paddle::framework::OpKernel<T> {
output, in_out.format(), ctx.GetPlace()); output, in_out.format(), ctx.GetPlace());
auto reorder_p = reorder_handler.AcquireReorder(target_mem, dst_mem); auto reorder_p = reorder_handler.AcquireReorder(target_mem, dst_mem);
reorder_p->execute(astream, *dst_mem, *target_mem); {
astream.wait(); platform::RecordEvent record_reorder("int_reorder",
platform::EventRole::kUniqueOp);
reorder_p->execute(astream, *dst_mem, *target_mem);
astream.wait();
}
} }
output->set_layout(framework::DataLayout::kMKLDNN); output->set_layout(framework::DataLayout::kMKLDNN);
output->set_format(platform::GetMKLDNNFormat(*dst_mem)); output->set_format(platform::GetMKLDNNFormat(*dst_mem));

@ -23,6 +23,7 @@ limitations under the License. */
#include "mkldnn.hpp" #include "mkldnn.hpp"
#include "paddle/fluid/framework/operator.h" #include "paddle/fluid/framework/operator.h"
#include "paddle/fluid/platform/place.h" #include "paddle/fluid/platform/place.h"
#include "paddle/fluid/platform/profiler.h"
namespace paddle { namespace paddle {
#ifdef PADDLE_WITH_MKLDNN #ifdef PADDLE_WITH_MKLDNN
using MKLDNNMemoryFormat = mkldnn::memory::format_tag; using MKLDNNMemoryFormat = mkldnn::memory::format_tag;
@ -188,6 +189,8 @@ inline void Reorder(mkldnn::memory src, mkldnn::memory dst,
const mkldnn::engine& engine) { const mkldnn::engine& engine) {
auto reorder_prim = mkldnn::reorder(src, dst); auto reorder_prim = mkldnn::reorder(src, dst);
mkldnn::stream astream(engine); mkldnn::stream astream(engine);
platform::RecordEvent record_reorder("int_reorder",
platform::EventRole::kUniqueOp);
reorder_prim.execute(astream, src, dst); reorder_prim.execute(astream, src, dst);
astream.wait(); astream.wait();
} }

@ -238,6 +238,9 @@ class MKLDNNHandlerT {
} }
mkldnn::stream astream(engine_); mkldnn::stream astream(engine_);
platform::RecordEvent record_reorder("int_reorder",
platform::EventRole::kUniqueOp);
reorder_p->execute(astream, {{MKLDNN_ARG_FROM, *user_memory_p}, reorder_p->execute(astream, {{MKLDNN_ARG_FROM, *user_memory_p},
{MKLDNN_ARG_TO, *target_memory_p}}); {MKLDNN_ARG_TO, *target_memory_p}});
astream.wait(); astream.wait();
@ -264,6 +267,8 @@ class MKLDNNHandlerT {
dev_ctx_.SetBlob(key_reorder_p, reorder_p); dev_ctx_.SetBlob(key_reorder_p, reorder_p);
mkldnn::stream astream(engine_); mkldnn::stream astream(engine_);
platform::RecordEvent record_reorder("int_reorder",
platform::EventRole::kUniqueOp);
reorder_p->execute(astream, {{MKLDNN_ARG_FROM, *user_memory_p}, reorder_p->execute(astream, {{MKLDNN_ARG_FROM, *user_memory_p},
{MKLDNN_ARG_TO, *target_memory_p}}); {MKLDNN_ARG_TO, *target_memory_p}});
astream.wait(); astream.wait();
@ -282,6 +287,8 @@ class MKLDNNHandlerT {
auto reorder_p = std::static_pointer_cast<mkldnn::reorder>( auto reorder_p = std::static_pointer_cast<mkldnn::reorder>(
dev_ctx_.GetBlob(key_reorder_p)); dev_ctx_.GetBlob(key_reorder_p));
if (reorder_p != nullptr) { if (reorder_p != nullptr) {
platform::RecordEvent record_reorder("int_reorder",
platform::EventRole::kUniqueOp);
reorder_p->execute(astream, {{MKLDNN_ARG_FROM, *user_memory_p}, reorder_p->execute(astream, {{MKLDNN_ARG_FROM, *user_memory_p},
{MKLDNN_ARG_TO, *target_memory_p}}); {MKLDNN_ARG_TO, *target_memory_p}});
astream.wait(); astream.wait();
@ -427,6 +434,8 @@ class MKLDNNHandler {
std::make_shared<mkldnn::reorder>(*user_memory_p, *target_memory_p); std::make_shared<mkldnn::reorder>(*user_memory_p, *target_memory_p);
dev_ctx_.SetBlob(key_reorder_p, reorder_p); dev_ctx_.SetBlob(key_reorder_p, reorder_p);
mkldnn::stream astream(engine_); mkldnn::stream astream(engine_);
platform::RecordEvent record_reorder("int_reorder",
platform::EventRole::kUniqueOp);
reorder_p->execute(astream, {{MKLDNN_ARG_FROM, *user_memory_p}, reorder_p->execute(astream, {{MKLDNN_ARG_FROM, *user_memory_p},
{MKLDNN_ARG_TO, *target_memory_p}}); {MKLDNN_ARG_TO, *target_memory_p}});
astream.wait(); astream.wait();
@ -474,6 +483,8 @@ class MKLDNNHandler {
std::shared_ptr<mkldnn::reorder>(new mkldnn::reorder(*reorder_pd)); std::shared_ptr<mkldnn::reorder>(new mkldnn::reorder(*reorder_pd));
dev_ctx_.SetBlob(key_reorder_p, reorder_p); dev_ctx_.SetBlob(key_reorder_p, reorder_p);
platform::RecordEvent record_reorder("int_reorder",
platform::EventRole::kUniqueOp);
reorder_p->execute(astream, {{MKLDNN_ARG_FROM, *user_memory_p}, reorder_p->execute(astream, {{MKLDNN_ARG_FROM, *user_memory_p},
{MKLDNN_ARG_TO, *target_memory_p}}); {MKLDNN_ARG_TO, *target_memory_p}});
astream.wait(); astream.wait();
@ -484,6 +495,8 @@ class MKLDNNHandler {
auto reorder_p = std::static_pointer_cast<mkldnn::reorder>( auto reorder_p = std::static_pointer_cast<mkldnn::reorder>(
dev_ctx_.GetBlob(key_reorder_p)); dev_ctx_.GetBlob(key_reorder_p));
if (reorder_p != nullptr) { if (reorder_p != nullptr) {
platform::RecordEvent record_reorder("int_reorder",
platform::EventRole::kUniqueOp);
reorder_p->execute(astream, {{MKLDNN_ARG_FROM, *user_memory_p}, reorder_p->execute(astream, {{MKLDNN_ARG_FROM, *user_memory_p},
{MKLDNN_ARG_TO, *target_memory_p}}); {MKLDNN_ARG_TO, *target_memory_p}});
astream.wait(); astream.wait();

@ -649,8 +649,14 @@ void PrintProfiler(
} }
std::cout << std::setw(data_width) << event_item.min_time std::cout << std::setw(data_width) << event_item.min_time
<< std::setw(data_width) << event_item.max_time << std::setw(data_width) << event_item.max_time
<< std::setw(data_width) << event_item.ave_time << std::setw(data_width) << event_item.ave_time;
<< std::setw(data_width) << event_item.ratio << std::endl; if (event_item.name.find("ext_reorder") != std::string::npos ||
event_item.name.find("int_reorder") != std::string::npos) {
std::cout << event_item.ratio << '*';
} else {
std::cout << std::setw(data_width) << event_item.ratio;
}
std::cout << std::endl;
PrintProfiler(child_table, child_map, sorted_func, sorted_by, overhead, PrintProfiler(child_table, child_map, sorted_func, sorted_by, overhead,
sorted_domain, name_width, data_width, merge_thread, sorted_domain, name_width, data_width, merge_thread,
@ -715,12 +721,32 @@ void AnalyzeEvent(
if (child_index[j] == 0) { if (child_index[j] == 0) {
main_event_items.push_back(event_items[j]); main_event_items.push_back(event_items[j]);
total += event_items[j].total_time; total += event_items[j].total_time;
} else if ((child_index[j] == 1 &&
(event_items[j].name.find("ext_reorder") !=
std::string::npos ||
event_items[j].name.find("int_reorder") !=
std::string::npos)) &&
platform::GetTracerOption() != TracerOption::kAllOpDetail) {
size_t first_slash_pos = event_items[j].name.find('/');
if (first_slash_pos != std::string::npos) {
std::string fname = event_items[j].name.substr(0, first_slash_pos);
child_map->insert(
std::pair<std::string, EventItem>(fname, event_items[j]));
}
} }
} }
// average time // average time
for (auto &item : main_event_items) { for (auto &item : main_event_items) {
item.ave_time = item.total_time / item.calls; item.ave_time = item.total_time / item.calls;
item.ratio = item.total_time / total; item.ratio = item.total_time / total;
if (platform::GetTracerOption() != TracerOption::kAllOpDetail) {
for (auto it = child_map->begin(); it != child_map->end(); ++it) {
if ((*it).first == item.name) {
(*it).second.ratio = (*it).second.total_time / item.total_time;
break; // to find only first item
}
}
}
} }
for (auto it = sub_child_map.begin(); it != sub_child_map.end(); it++) { for (auto it = sub_child_map.begin(); it != sub_child_map.end(); it++) {
it->second.ratio = it->second.total_time / total; it->second.ratio = it->second.total_time / total;

Loading…
Cancel
Save