"add flags"

add_depthwiseConv_op_gpu
dzhwinter 8 years ago
parent 42daf4c302
commit f0316bdbbd

@ -543,8 +543,14 @@ void OperatorWithKernel::Run(const Scope& scope,
auto kernel_iter = kernels.find(expected_kernel_key);
kernel_iter->second->Compute(ExecutionContext(
*this, new_scope, *pool.Get(expected_kernel_key.place_)));
auto* new_dev_ctx = pool.Get(expected_kernel_key.place_);
kernel_iter->second->Compute(
ExecutionContext(*this, new_scope, *new_dev_ctx));
/*For profiling/benchmark only*/
if (FLAGS_op_sync) {
new_dev_ctx->Wait();
}
}
proto::DataType OperatorWithKernel::IndicateDataType(

@ -22,6 +22,10 @@ DEFINE_double(fraction_of_gpu_memory_to_use, 0.92,
"Default use 92% of GPU memory for PaddlePaddle,"
"reserve the rest for page tables, etc");
DEFINE_bool(op_sync, false,
"Default cuda is asynchronous device, set to True will"
"force op run in synchronous mode.");
namespace paddle {
namespace platform {

Loading…
Cancel
Save