|
|
|
@ -166,16 +166,11 @@ void OperatorBase::Run(const Scope& scope, const platform::Place& place) {
|
|
|
|
|
#endif
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// The profile has a process-wide mutex, results in serious performance
|
|
|
|
|
// issue
|
|
|
|
|
// in concurrency scenerio. Here use an `if` to fix this issue.
|
|
|
|
|
// Please not remove the `if`, ask @Superjomn if there are any concern.
|
|
|
|
|
if (platform::IsProfileEnabled()) {
|
|
|
|
|
platform::RecordEvent record_event(Type());
|
|
|
|
|
RunImpl(scope, place);
|
|
|
|
|
} else {
|
|
|
|
|
{
|
|
|
|
|
platform::RecordEvent record_event(Type() + "_op");
|
|
|
|
|
RunImpl(scope, place);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
VLOG(3) << place << " " << DebugStringEx(&scope);
|
|
|
|
|
} catch (platform::EnforceNotMet& exception) {
|
|
|
|
|
framework::InsertCallStackInfo(Type(), Attrs(), &exception);
|
|
|
|
@ -953,9 +948,12 @@ void OperatorWithKernel::RunImpl(const Scope& scope,
|
|
|
|
|
|
|
|
|
|
// do data transformScope &transfer_scope;
|
|
|
|
|
std::vector<std::string> transfered_inplace_vars;
|
|
|
|
|
auto* transfer_scope =
|
|
|
|
|
PrepareData(scope, *kernel_type_, &transfered_inplace_vars, runtime_ctx);
|
|
|
|
|
|
|
|
|
|
Scope* transfer_scope = nullptr;
|
|
|
|
|
{
|
|
|
|
|
platform::RecordEvent record_event("prepare_data");
|
|
|
|
|
transfer_scope = PrepareData(scope, *kernel_type_, &transfered_inplace_vars,
|
|
|
|
|
runtime_ctx);
|
|
|
|
|
}
|
|
|
|
|
// exec scope is the scope that kernel actually executed on.
|
|
|
|
|
const Scope& exec_scope =
|
|
|
|
|
(transfer_scope == nullptr ? scope : *transfer_scope);
|
|
|
|
@ -965,6 +963,7 @@ void OperatorWithKernel::RunImpl(const Scope& scope,
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if (!all_kernels_must_compute_runtime_shape_) {
|
|
|
|
|
platform::RecordEvent record_event("infer_shape");
|
|
|
|
|
RuntimeInferShapeContext infer_shape_ctx(*this, *runtime_ctx);
|
|
|
|
|
this->InferShape(&infer_shape_ctx);
|
|
|
|
|
}
|
|
|
|
@ -975,8 +974,11 @@ void OperatorWithKernel::RunImpl(const Scope& scope,
|
|
|
|
|
|
|
|
|
|
// TODO(panyx0718): ExecutionContext should only depend on RuntimeContext
|
|
|
|
|
// not Scope. Imperative mode only pass inputs and get outputs.
|
|
|
|
|
{
|
|
|
|
|
platform::RecordEvent record_event("compute");
|
|
|
|
|
(*kernel_func_)(ExecutionContext(*this, exec_scope, *dev_ctx, *runtime_ctx,
|
|
|
|
|
kernel_configs));
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if (!transfered_inplace_vars.empty()) {
|
|
|
|
|
// there is inplace variable has been transfered.
|
|
|
|
|