|
|
@ -35,7 +35,8 @@ class BiasAddGpuKernel : public GpuKernel {
|
|
|
|
cudnn_data_type_(CUDNN_DATA_FLOAT),
|
|
|
|
cudnn_data_type_(CUDNN_DATA_FLOAT),
|
|
|
|
x_desc_(nullptr),
|
|
|
|
x_desc_(nullptr),
|
|
|
|
b_desc_(nullptr),
|
|
|
|
b_desc_(nullptr),
|
|
|
|
op_desc_(nullptr) {}
|
|
|
|
op_desc_(nullptr),
|
|
|
|
|
|
|
|
is_null_input_(false) {}
|
|
|
|
~BiasAddGpuKernel() override { DestroyResource(); }
|
|
|
|
~BiasAddGpuKernel() override { DestroyResource(); }
|
|
|
|
|
|
|
|
|
|
|
|
const std::vector<size_t> &GetInputSizeList() const override { return input_size_list_; }
|
|
|
|
const std::vector<size_t> &GetInputSizeList() const override { return input_size_list_; }
|
|
|
@ -45,6 +46,10 @@ class BiasAddGpuKernel : public GpuKernel {
|
|
|
|
const std::vector<AddressPtr> &outputs, void *stream_ptr) override {
|
|
|
|
const std::vector<AddressPtr> &outputs, void *stream_ptr) override {
|
|
|
|
VARIABLE_NOT_USED(workspace);
|
|
|
|
VARIABLE_NOT_USED(workspace);
|
|
|
|
VARIABLE_NOT_USED(stream_ptr);
|
|
|
|
VARIABLE_NOT_USED(stream_ptr);
|
|
|
|
|
|
|
|
if (is_null_input_) {
|
|
|
|
|
|
|
|
return true;
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
T *x_addr = GetDeviceAddress<T>(inputs, 0);
|
|
|
|
T *x_addr = GetDeviceAddress<T>(inputs, 0);
|
|
|
|
T *b_addr = GetDeviceAddress<T>(inputs, 1);
|
|
|
|
T *b_addr = GetDeviceAddress<T>(inputs, 1);
|
|
|
|
T *output_addr = GetDeviceAddress<T>(outputs, 0);
|
|
|
|
T *output_addr = GetDeviceAddress<T>(outputs, 0);
|
|
|
@ -65,6 +70,13 @@ class BiasAddGpuKernel : public GpuKernel {
|
|
|
|
cudnn_data_type_ = kCudnnDtypeMap[TypeIdLabel(AnfAlgo::GetInputDeviceDataType(kernel_node, 0))];
|
|
|
|
cudnn_data_type_ = kCudnnDtypeMap[TypeIdLabel(AnfAlgo::GetInputDeviceDataType(kernel_node, 0))];
|
|
|
|
auto x_shape = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 0);
|
|
|
|
auto x_shape = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 0);
|
|
|
|
auto num_dims = x_shape.size();
|
|
|
|
auto num_dims = x_shape.size();
|
|
|
|
|
|
|
|
is_null_input_ = CHECK_NULL_INPUT(x_shape);
|
|
|
|
|
|
|
|
if (is_null_input_) {
|
|
|
|
|
|
|
|
MS_LOG(WARNING) << "input is null";
|
|
|
|
|
|
|
|
InitSizeLists();
|
|
|
|
|
|
|
|
return true;
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
if (num_dims < 2) {
|
|
|
|
if (num_dims < 2) {
|
|
|
|
MS_LOG(EXCEPTION) << "input dims must be at least 2, but got " << num_dims;
|
|
|
|
MS_LOG(EXCEPTION) << "input dims must be at least 2, but got " << num_dims;
|
|
|
|
}
|
|
|
|
}
|
|
|
@ -126,6 +138,7 @@ class BiasAddGpuKernel : public GpuKernel {
|
|
|
|
cudnnTensorDescriptor_t x_desc_;
|
|
|
|
cudnnTensorDescriptor_t x_desc_;
|
|
|
|
cudnnTensorDescriptor_t b_desc_;
|
|
|
|
cudnnTensorDescriptor_t b_desc_;
|
|
|
|
cudnnOpTensorDescriptor_t op_desc_;
|
|
|
|
cudnnOpTensorDescriptor_t op_desc_;
|
|
|
|
|
|
|
|
bool is_null_input_;
|
|
|
|
std::vector<size_t> input_size_list_;
|
|
|
|
std::vector<size_t> input_size_list_;
|
|
|
|
std::vector<size_t> output_size_list_;
|
|
|
|
std::vector<size_t> output_size_list_;
|
|
|
|
std::vector<size_t> workspace_size_list_;
|
|
|
|
std::vector<size_t> workspace_size_list_;
|
|
|
|