Compare commits

...

3 Commits

Author SHA1 Message Date
furnace 3560e6806c
[ROCM] Add ROCm support for warpctc op (#31817) (#31971)
4 years ago
ronnywang 4fd5ed434c
[ROCM] added a cudnn switch of conv2d for rocm platform (#31836) (#31932)
4 years ago
Qi Li 9b40cb8744
[ROCM] fix test_matmul_v2_op (#31802) (#31828)
4 years ago

@ -14,11 +14,15 @@
INCLUDE(ExternalProject)
IF(WITH_ROCM)
add_definitions(-DWARPCTC_WITH_HIP)
ENDIF()
SET(WARPCTC_PREFIX_DIR ${THIRD_PARTY_PATH}/warpctc)
SET(WARPCTC_SOURCE_DIR ${THIRD_PARTY_PATH}/warpctc/src/extern_warpctc)
SET(WARPCTC_INSTALL_DIR ${THIRD_PARTY_PATH}/install/warpctc)
set(WARPCTC_REPOSITORY ${GIT_URL}/baidu-research/warp-ctc.git)
set(WARPCTC_TAG cd828e5b6c3b953b82af73f7f44cddc393a20efa)
set(WARPCTC_TAG c690fc5755abbdbdc98ef78d51ec10a6748a8cd1)
SET(WARPCTC_INCLUDE_DIR "${WARPCTC_INSTALL_DIR}/include"
CACHE PATH "Warp-ctc Directory" FORCE)
@ -57,6 +61,7 @@ ExternalProject_Add(
-DCMAKE_CXX_FLAGS_DEBUG=$<FILTER:${CMAKE_CXX_FLAGS_DEBUG},EXCLUDE,/Zc:inline>
-DCMAKE_INSTALL_PREFIX=${WARPCTC_INSTALL_DIR}
-DWITH_GPU=${WITH_GPU}
-DWITH_ROCM=${WITH_ROCM}
-DWITH_OMP=${USE_OMP}
-DWITH_TORCH=OFF
-DCMAKE_DISABLE_FIND_PACKAGE_Torch=ON

@ -160,7 +160,7 @@ struct DotGradFunction<DeviceContext, T, math::DisableComplex<T>> {
const Tensor* tensor_dout, Tensor* tensor_dx,
Tensor* tensor_dy,
const paddle::framework::ExecutionContext& ctx) {
#ifdef __NVCC__
#if defined(__NVCC__) || defined(__HIPCC__)
if (1 == tensor_dout->dims().size()) {
auto dout = framework::EigenVector<T>::Flatten(*tensor_dout);

@ -159,8 +159,7 @@ class WarpCTCFunctor {
warpctc_version_ = platform::dynload::get_warpctc_version();
if (platform::is_gpu_place(ctx.GetPlace())) {
// HIP not support ctcOptions in third-party warpctc
#ifdef PADDLE_WITH_CUDA
#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP)
options_.loc = CTC_GPU;
options_.stream = reinterpret_cast<const platform::CUDADeviceContext&>(
ctx.device_context())

@ -564,3 +564,15 @@ DEFINE_string(tracer_mkldnn_ops_on, "",
*/
DEFINE_string(tracer_mkldnn_ops_off, "",
"List of OneDNN operation types to be turned off");
/**
* CUDNN related FLAG
* Name: conv2d_disable_cudnn
* Since Version:
* Value Range: bool, default=false
* Example:
* Note: Disable cudnn in conv2d.
*/
#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP)
DEFINE_bool(conv2d_disable_cudnn, false, "Disable cudnn in conv2d");
#endif

@ -72,6 +72,7 @@ DECLARE_uint64(conv_workspace_size_limit);
DECLARE_bool(cudnn_batchnorm_spatial_persistent);
DECLARE_bool(cudnn_deterministic);
DECLARE_bool(cudnn_exhaustive_search);
DECLARE_bool(conv2d_disable_cudnn);
// data processing
DECLARE_bool(enable_cublas_tensor_op_math);
// device management
@ -367,7 +368,8 @@ static void RegisterGlobalVarGetterSetter() {
FLAGS_fraction_of_cuda_pinned_memory_to_use,
FLAGS_fraction_of_gpu_memory_to_use, FLAGS_initial_gpu_memory_in_mb,
FLAGS_reallocate_gpu_memory_in_mb, FLAGS_enable_cublas_tensor_op_math,
FLAGS_selected_gpus, FLAGS_sync_nccl_allreduce);
FLAGS_selected_gpus, FLAGS_sync_nccl_allreduce,
FLAGS_conv2d_disable_cudnn);
#endif
#ifdef PADDLE_WITH_XPU
REGISTER_PUBLIC_GLOBAL_VAR(FLAGS_selected_xpus);

@ -230,6 +230,7 @@ def __bootstrap__():
'gpu_allocator_retry_time',
'local_exe_sub_scope_limit',
'gpu_memory_limit_mb',
'conv2d_disable_cudnn',
]
core.init_gflags(["--tryfromenv=" + ",".join(read_env_flags)])
core.init_glog(sys.argv[0])

@ -1603,6 +1603,10 @@ def conv2d(input,
pre_bias = helper.create_variable_for_type_inference(dtype)
if (core.is_compiled_with_cuda() and paddle.fluid.get_flags(
"FLAGS_conv2d_disable_cudnn")["FLAGS_conv2d_disable_cudnn"]):
use_cudnn = False
helper.append_op(
type=l_type,
inputs={

@ -1465,5 +1465,41 @@ class TestConv2DAPI_Error(unittest.TestCase):
self.assertRaises(ValueError, run_7)
# --------- test environment variable ------
@unittest.skipIf(
not (core.is_compiled_with_cuda() or core.is_compiled_with_rocm()),
"core is not compiled with CUDA or ROCM")
class TestConv2DEnviron(unittest.TestCase):
def run_conv2d_api(self):
inputs = fluid.layers.data(
shape=[2, 3, 5, 5],
append_batch_size=False,
name="inputs",
dtype="float32")
fluid.layers.conv2d(
input=inputs,
num_filters=4,
filter_size=[3, 3],
stride=[1, 1],
padding=0,
dilation=[1, 1],
groups=1,
data_format="NCHW")
x_var = paddle.uniform((2, 3, 5, 5), dtype="float32", min=-1., max=1.)
conv = paddle.nn.Conv2D(
in_channels=3,
out_channels=4,
kernel_size=(3, 3),
data_format="NCHW")
y_var = conv(x_var)
def test_environ(self):
fluid.set_flags({'FLAGS_conv2d_disable_cudnn': False})
self.run_conv2d_api()
fluid.set_flags({'FLAGS_conv2d_disable_cudnn': True})
self.run_conv2d_api()
if __name__ == '__main__':
unittest.main()

@ -67,7 +67,7 @@ class TestMatMulV2Op(OpTest):
self.trans_y = False
def init_kernel_type(self):
self.dtype = "float64"
self.dtype = "float32" if core.is_compiled_with_rocm() else "float64"
def setUp(self):
self.init_kernel_type()
@ -91,7 +91,10 @@ class TestMatMulV2Op(OpTest):
self.check_output()
def test_check_grad(self):
self.check_grad(['X', 'Y'], 'Out')
if core.is_compiled_with_rocm():
self.check_grad(['X', 'Y'], 'Out', max_relative_error=1e-2)
else:
self.check_grad(['X', 'Y'], 'Out')
class TestMatMuklOp2(TestMatMulV2Op):

@ -20,6 +20,7 @@ import numpy as np
from op_test import OpTest
from test_softmax_op import stable_softmax
import paddle.fluid as fluid
import paddle.fluid.core as core
from paddle.fluid import Program, program_guard
import paddle
import paddle.nn.functional as F
@ -240,8 +241,18 @@ class TestWarpCTCOp(OpTest):
def test_check_grad(self):
self.outputs['WarpCTCGrad'] = self.gradient
self.check_grad(
["Logits"], "Loss", max_relative_error=0.007, check_dygraph=False)
if core.is_compiled_with_rocm():
self.check_grad(
["Logits"],
"Loss",
max_relative_error=0.009,
check_dygraph=False)
else:
self.check_grad(
["Logits"],
"Loss",
max_relative_error=0.007,
check_dygraph=False)
class TestWarpCTCOpCase1(TestWarpCTCOp):
@ -335,8 +346,18 @@ class TestWarpCTCOpWithPadding(OpTest):
def test_check_grad(self):
self.outputs['WarpCTCGrad'] = self.gradient
self.check_grad(
["Logits"], "Loss", max_relative_error=0.007, check_dygraph=False)
if core.is_compiled_with_rocm():
self.check_grad(
["Logits"],
"Loss",
max_relative_error=0.009,
check_dygraph=False)
else:
self.check_grad(
["Logits"],
"Loss",
max_relative_error=0.007,
check_dygraph=False)
class TestWarpCTCOpWithPaddingCase1(TestWarpCTCOpWithPadding):

@ -25,6 +25,7 @@ __all__ = [
import numpy as np
from ...fluid import get_flags
from ...fluid import core
from ...device import get_cudnn_version
from ...fluid.dygraph import layers
@ -644,6 +645,10 @@ class Conv2D(_ConvNd):
bias_attr=bias_attr,
data_format=data_format)
if (core.is_compiled_with_cuda() and get_flags(
"FLAGS_conv2d_disable_cudnn")["FLAGS_conv2d_disable_cudnn"]):
self._use_cudnn = False
def forward(self, x):
if self._padding_mode != 'zeros':
x = F.pad(x,

Loading…
Cancel
Save