diff --git a/mindspore/ccsrc/backend/kernel_compiler/cpu/mkldnn/conv2d_cpu_kernel.cc b/mindspore/ccsrc/backend/kernel_compiler/cpu/mkldnn/conv2d_cpu_kernel.cc index e58b1d319c..834f489d79 100644 --- a/mindspore/ccsrc/backend/kernel_compiler/cpu/mkldnn/conv2d_cpu_kernel.cc +++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/mkldnn/conv2d_cpu_kernel.cc @@ -32,8 +32,6 @@ void Conv2dCPUKernel::InitKernel(const CNodePtr &kernel_node) { dnnl::memory::desc src_desc = GetDefaultMemDesc(src_shape); dnnl::memory::desc weights_desc = GetDefaultMemDesc(weight_shape); dnnl::memory::desc dst_desc = GetDefaultMemDesc(dst_shape); - - int kernel_size = SizeToInt(weight_shape[3]); auto stride_ori = AnfAlgo::GetNodeAttr>(kernel_node, STRIDE); auto dilation_ori = AnfAlgo::GetNodeAttr>(kernel_node, DILATION); if (stride_ori.size() != 4 || stride_ori[2] != stride_ori[3]) { @@ -57,6 +55,7 @@ void Conv2dCPUKernel::InitKernel(const CNodePtr &kernel_node) { std::vector int_padding_r; const std::string pad_mode = AnfAlgo::GetNodeAttr(kernel_node, PAD_MODE); + std::vector kernel_size({weight_shape[2], weight_shape[3]}); GetPadding(kernel_node, pad_mode, src_shape, kernel_size, stride, &int_padding_l, &int_padding_r); if (int_padding_l.size() != 2 || int_padding_r.size() != 2) { MS_LOG(EXCEPTION) << "get padding failed"; diff --git a/mindspore/ccsrc/backend/kernel_compiler/cpu/mkldnn/conv2d_grad_filter_cpu_kernel.cc b/mindspore/ccsrc/backend/kernel_compiler/cpu/mkldnn/conv2d_grad_filter_cpu_kernel.cc index 3fa6a91405..e2aff0c18c 100644 --- a/mindspore/ccsrc/backend/kernel_compiler/cpu/mkldnn/conv2d_grad_filter_cpu_kernel.cc +++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/mkldnn/conv2d_grad_filter_cpu_kernel.cc @@ -32,8 +32,6 @@ void Conv2dGradFilterCPUKernel::InitKernel(const CNodePtr &kernel_node) { dnnl::memory::desc src_desc = GetDefaultMemDesc(src_shape); dnnl::memory::desc weights_desc = GetDefaultMemDesc(weight_shape); dnnl::memory::desc dst_desc = GetDefaultMemDesc(dst_shape); - - int kernel_size = SizeToInt(weight_shape[3]); auto stride_ori = AnfAlgo::GetNodeAttr>(kernel_node, STRIDE); auto dilation_ori = AnfAlgo::GetNodeAttr>(kernel_node, DILATION); if (stride_ori.size() != 2 || stride_ori[0] != stride_ori[1]) { @@ -53,6 +51,7 @@ void Conv2dGradFilterCPUKernel::InitKernel(const CNodePtr &kernel_node) { const std::string pad_mode = AnfAlgo::GetNodeAttr(kernel_node, PAD_MODE); std::vector int_padding_l; std::vector int_padding_r; + std::vector kernel_size({weight_shape[2], weight_shape[3]}); GetPadding(kernel_node, pad_mode, src_shape, kernel_size, stride, &int_padding_l, &int_padding_r); if (int_padding_l.size() != 2 || int_padding_r.size() != 2) { MS_LOG(EXCEPTION) << "get padding failed"; diff --git a/mindspore/ccsrc/backend/kernel_compiler/cpu/mkldnn/conv2d_grad_input_cpu_kernel.cc b/mindspore/ccsrc/backend/kernel_compiler/cpu/mkldnn/conv2d_grad_input_cpu_kernel.cc index 1f02d70f86..a1677905be 100644 --- a/mindspore/ccsrc/backend/kernel_compiler/cpu/mkldnn/conv2d_grad_input_cpu_kernel.cc +++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/mkldnn/conv2d_grad_input_cpu_kernel.cc @@ -33,7 +33,6 @@ void Conv2dGradInputCPUKernel::InitKernel(const CNodePtr &kernel_node) { dnnl::memory::desc weights_desc = GetDefaultMemDesc(weight_shape); dnnl::memory::desc dst_desc = GetDefaultMemDesc(dst_shape); - int kernel_size = SizeToInt(weight_shape[3]); auto stride_ori = AnfAlgo::GetNodeAttr>(kernel_node, STRIDE); auto dilation_ori = AnfAlgo::GetNodeAttr>(kernel_node, DILATION); if (stride_ori.size() != 2 || stride_ori[0] != stride_ori[1]) { @@ -52,6 +51,7 @@ void Conv2dGradInputCPUKernel::InitKernel(const CNodePtr &kernel_node) { std::vector int_padding_l; std::vector int_padding_r; const std::string pad_mode = AnfAlgo::GetNodeAttr(kernel_node, PAD_MODE); + std::vector kernel_size({weight_shape[2], weight_shape[3]}); GetPadding(kernel_node, pad_mode, src_shape, kernel_size, stride, &int_padding_l, &int_padding_r); if (int_padding_l.size() != 2 || int_padding_r.size() != 2) { MS_LOG(EXCEPTION) << "conv2d grad get padding failed"; diff --git a/mindspore/ccsrc/backend/kernel_compiler/cpu/mkldnn/mkl_cpu_kernel.cc b/mindspore/ccsrc/backend/kernel_compiler/cpu/mkldnn/mkl_cpu_kernel.cc index c71abe809d..49510d29e0 100644 --- a/mindspore/ccsrc/backend/kernel_compiler/cpu/mkldnn/mkl_cpu_kernel.cc +++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/mkldnn/mkl_cpu_kernel.cc @@ -23,7 +23,7 @@ namespace mindspore { namespace kernel { void MKLCPUKernel::GetPadding(const CNodePtr &kernel_node, const std::string &pad_mode, - const std::vector &src_shape, int kernel_size, int stride, + const std::vector &src_shape, const std::vector &kernel_size, int stride, std::vector *padding_l, std::vector *padding_r) { MS_EXCEPTION_IF_NULL(kernel_node); if (src_shape.size() < 2) { @@ -32,11 +32,13 @@ void MKLCPUKernel::GetPadding(const CNodePtr &kernel_node, const std::string &pa std::vector weight_height; weight_height.emplace_back(src_shape[src_shape.size() - 2]); weight_height.emplace_back(src_shape[src_shape.size() - 1]); - int rad = kernel_size / 2; - int need_pad = kernel_size - 1; + MS_LOG(INFO) << "pad mode " << pad_mode; if (pad_mode == PAD_MODE_LOWER_SAME || pad_mode == PAD_MODE_UPPER_SAME) { - for (auto wh : weight_height) { + for (size_t i = 0; i < weight_height.size(); ++i) { + auto wh = weight_height[i]; + int rad = kernel_size[i] / 2; + int need_pad = kernel_size[i] - 1; int re = (wh - 1) % stride; int pad = std::max(rad - (re / 2), 0); padding_r->emplace_back(pad); diff --git a/mindspore/ccsrc/backend/kernel_compiler/cpu/mkldnn/mkl_cpu_kernel.h b/mindspore/ccsrc/backend/kernel_compiler/cpu/mkldnn/mkl_cpu_kernel.h index 3987ffc3bf..7f145c7116 100644 --- a/mindspore/ccsrc/backend/kernel_compiler/cpu/mkldnn/mkl_cpu_kernel.h +++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/mkldnn/mkl_cpu_kernel.h @@ -33,7 +33,8 @@ class MKLCPUKernel : public CPUKernel { protected: void GetPadding(const CNodePtr &kernel_node, const std::string &pad_mode, const std::vector &src_shape, - int kernel_size, int stride, std::vector *padding_l, std::vector *padding_r); + const std::vector &kernel_size, int stride, std::vector *padding_l, + std::vector *padding_r); void AddArgument(int arg_key, const dnnl::memory::desc &mem_desc, bool alloc = false); void SetArgumentHandle(int arg_key, void *ptr); dnnl::memory::format_tag GetDefaultFormatTag(const dnnl::memory::dims &dims) const; diff --git a/mindspore/ccsrc/backend/kernel_compiler/cpu/mkldnn/pooling_cpu_kernel.cc b/mindspore/ccsrc/backend/kernel_compiler/cpu/mkldnn/pooling_cpu_kernel.cc index e4bedf23b9..0bb1edde4d 100644 --- a/mindspore/ccsrc/backend/kernel_compiler/cpu/mkldnn/pooling_cpu_kernel.cc +++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/mkldnn/pooling_cpu_kernel.cc @@ -28,17 +28,18 @@ void PoolingCPUKernel::InitKernel(const CNodePtr &kernel_node) { std::vector dst_shape = AnfAlgo::GetOutputDeviceShape(kernel_node, 0); dnnl::memory::desc src_desc = GetDefaultMemDesc(src_shape); dnnl::memory::desc dst_desc = GetDefaultMemDesc(dst_shape); - std::vector kernel_sizes = AnfAlgo::GetNodeAttr>(kernel_node, KSIZE); + std::vector origin_kernel_sizes = AnfAlgo::GetNodeAttr>(kernel_node, KSIZE); std::vector strides = AnfAlgo::GetNodeAttr>(kernel_node, STRIDES); - if (kernel_sizes.size() != 4 || strides.size() != 4) { - MS_LOG(EXCEPTION) << "invalid kernel size " << kernel_sizes.size() << " or stride size " << strides.size(); + if (origin_kernel_sizes.size() != 4 || strides.size() != 4) { + MS_LOG(EXCEPTION) << "invalid kernel size " << origin_kernel_sizes.size() << " or stride size " << strides.size(); } dnnl::memory::dims strides_dims{strides[2], strides[3]}; - dnnl::memory::dims kernels_dims{kernel_sizes[2], kernel_sizes[3]}; + dnnl::memory::dims kernels_dims{origin_kernel_sizes[2], origin_kernel_sizes[3]}; const std::string pad_mode = AnfAlgo::GetNodeAttr(kernel_node, PADDING); std::vector int_padding_l; std::vector int_padding_r; - GetPadding(kernel_node, pad_mode, src_shape, kernel_sizes[3], strides[3], &int_padding_l, &int_padding_r); + std::vector kernel_size({IntToSize(origin_kernel_sizes[2]), IntToSize(origin_kernel_sizes[3])}); + GetPadding(kernel_node, pad_mode, src_shape, kernel_size, strides[3], &int_padding_l, &int_padding_r); if (int_padding_l.size() != 2 || int_padding_r.size() != 2) { MS_LOG(EXCEPTION) << "pooling get padding failed"; } diff --git a/mindspore/ccsrc/backend/kernel_compiler/cpu/mkldnn/pooling_grad_cpu_kernel.cc b/mindspore/ccsrc/backend/kernel_compiler/cpu/mkldnn/pooling_grad_cpu_kernel.cc index 8189df07ff..1466b54452 100644 --- a/mindspore/ccsrc/backend/kernel_compiler/cpu/mkldnn/pooling_grad_cpu_kernel.cc +++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/mkldnn/pooling_grad_cpu_kernel.cc @@ -34,7 +34,7 @@ void PoolingGradCPUKernel::InitKernel(const CNodePtr &kernel_node) { } std::vector padding_r; const std::string pad_mode = AnfAlgo::GetNodeAttr(kernel_node, PADDING); - kernel_size_ = kernel_sizes[3]; + kernel_size_ = {IntToSize(kernel_sizes[2]), IntToSize(kernel_sizes[3])}; stride_ = strides[3]; GetPadding(kernel_node, pad_mode, src_shape_, kernel_size_, stride_, &padding_l_, &padding_r); } @@ -77,7 +77,7 @@ void PoolingGradCPUKernel::ChannelPoolingGrad(const float *input, const float *d size_t diff_index = 0; for (size_t h = 0; h < dst_shape_[2]; ++h) { box[0].first = IntToSize(std::max(h_start, 0)); - box[0].second = IntToSize(std::min(h_start + kernel_size_, src_height)); + box[0].second = IntToSize(std::min(h_start + SizeToInt(kernel_size_[1]), src_height)); for (size_t w = 0; w < src_shape_[3]; ++w) { row_max_pair[w].first = 0; row_max_pair[w].second = 0; @@ -85,7 +85,7 @@ void PoolingGradCPUKernel::ChannelPoolingGrad(const float *input, const float *d int w_start = -padding_l_[1]; for (size_t w = 0; w < dst_shape_[3]; ++w) { box[1].first = IntToSize(std::max(w_start, 0)); - box[1].second = IntToSize(std::min(w_start + kernel_size_, src_width)); + box[1].second = IntToSize(std::min(w_start + SizeToInt(kernel_size_[0]), src_width)); RowPoolingGrad(input, output, diff[diff_index], box, &row_max_pair); diff_index += 1; w_start += stride_; diff --git a/mindspore/ccsrc/backend/kernel_compiler/cpu/mkldnn/pooling_grad_cpu_kernel.h b/mindspore/ccsrc/backend/kernel_compiler/cpu/mkldnn/pooling_grad_cpu_kernel.h index fa6d077d4b..6c3f6a4ef1 100644 --- a/mindspore/ccsrc/backend/kernel_compiler/cpu/mkldnn/pooling_grad_cpu_kernel.h +++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/mkldnn/pooling_grad_cpu_kernel.h @@ -37,7 +37,8 @@ class PoolingGradCPUKernel : public MKLCPUKernel { void RowPoolingGrad(const float *input, float *output, float diff, const std::vector> &box, std::vector> *row_max_pair); void ChannelPoolingGrad(const float *input, const float *diff, float *output); - int stride_{0}, kernel_size_{0}; + int stride_{0}; + std::vector kernel_size_; std::vector padding_l_; std::vector src_shape_; std::vector dst_shape_; diff --git a/mindspore/ccsrc/runtime/device/cpu/cpu_kernel_runtime.cc b/mindspore/ccsrc/runtime/device/cpu/cpu_kernel_runtime.cc index d2e41a1fbd..8b144b0440 100644 --- a/mindspore/ccsrc/runtime/device/cpu/cpu_kernel_runtime.cc +++ b/mindspore/ccsrc/runtime/device/cpu/cpu_kernel_runtime.cc @@ -36,23 +36,6 @@ namespace mindspore { namespace device { namespace cpu { const size_t INIT_NODE_REF = 1; -namespace { -TypeId GetCPUSupportOutputTypeId(const TypeId type_id) { - TypeId support_type_id = type_id; - if (type_id == kNumberTypeUInt32) { - support_type_id = kNumberTypeInt32; - } - if (type_id == kNumberTypeFloat || type_id == kNumberTypeFloat16 || type_id == kNumberTypeFloat32 || - type_id == kNumberTypeFloat64) { - support_type_id = kNumberTypeFloat32; - } - if (support_type_id != kNumberTypeInt32 && support_type_id != kNumberTypeFloat32) { - MS_LOG(EXCEPTION) << "Check output type failed."; - } - return support_type_id; -} -} // namespace - void CPUKernelRuntime::AssignKernelAddress(session::KernelGraph *kernel_graph) { AssignValueNodeAddress(kernel_graph); AssignInputNodeAddress(kernel_graph); @@ -157,15 +140,25 @@ tensor::TensorPtr CPUKernelRuntime::CreatTensorForOutput(const CNodePtr &node, s auto shape = AnfAlgo::GetOutputInferShape(node, index); std::vector temp_shape; (void)temp_shape.insert(temp_shape.end(), shape.begin(), shape.end()); - TypeId type_id = AnfAlgo::GetOutputInferDataType(node, index); - type_id = GetCPUSupportOutputTypeId(type_id); - tensor::TensorPtr tensor = std::make_shared(type_id, temp_shape); + TypeId infer_type_id = AnfAlgo::GetOutputInferDataType(node, index); + TypeId device_type_id = AnfAlgo::GetOutputDeviceDataType(node, index); + tensor::TensorPtr tensor = std::make_shared(infer_type_id, temp_shape); MS_EXCEPTION_IF_NULL(tensor); if (bound_addresses->find(address) != bound_addresses->end()) { tensor->set_device_address(address); need_sync_outputs->emplace_back(tensor); } else { - address->ptr_ = tensor->data_c(); + if (infer_type_id != device_type_id) { + size_t type_size = GetTypeByte(TypeIdToType(device_type_id)); + std::vector data_shape = tensor->shape(); + size_t tensor_size = std::accumulate(data_shape.begin(), data_shape.end(), type_size, std::multiplies()); + address->ptr_ = resource_manager_.MemMalloc(tensor_size); + need_sync_outputs->emplace_back(tensor); + tensor->set_device_address(address); + need_sync_outputs->emplace_back(tensor); + } else { + address->ptr_ = tensor->data_c(); + } address->ref_count_ = INIT_NODE_REF; (void)bound_addresses->insert(address); } @@ -226,12 +219,13 @@ void CPUKernelRuntime::BindInputOutput(const session::KernelGraph *kernel_graph, if (tensor_address != nullptr && tensor_address != address) { (void)tensor->data_sync(); } - std::vector data_shape = tensor->shape(); - size_t tensor_size = - std::accumulate(data_shape.begin(), data_shape.end(), sizeof(float), std::multiplies()); + if (tensor->data_type() == kNumberTypeFloat32 || tensor->data_type() == kNumberTypeInt32) { address->ptr_ = tensor->data_c(); } else { + std::vector data_shape = tensor->shape(); + size_t tensor_size = + std::accumulate(data_shape.begin(), data_shape.end(), sizeof(float), std::multiplies()); address->ptr_ = resource_manager_.MemMalloc(tensor_size); if (!address->SyncHostToDevice(data_shape, LongToSize(tensor->data().nbytes()), tensor->data_type(), tensor->data_c())) { diff --git a/mindspore/ccsrc/runtime/device/cpu/kernel_select_cpu.cc b/mindspore/ccsrc/runtime/device/cpu/kernel_select_cpu.cc index 9528e61ee9..b9496318dc 100644 --- a/mindspore/ccsrc/runtime/device/cpu/kernel_select_cpu.cc +++ b/mindspore/ccsrc/runtime/device/cpu/kernel_select_cpu.cc @@ -141,7 +141,11 @@ void SetKernelInfo(const CNodePtr &kernel_node) { if (kernel_attr.GetAllSame()) { ExpandKernelAttr(kernel_node, &kernel_attr); } - if (IsInputFormatDtypeMatched(kernel_attr, input_formats, input_types, input_not_cnode_indexes)) { + bool ignore_check = false; + if (index == kernel_attrs.size() - 1 && input_types.size() == input_not_cnode_indexes.size()) { + ignore_check = true; + } + if (ignore_check || IsInputFormatDtypeMatched(kernel_attr, input_formats, input_types, input_not_cnode_indexes)) { size_t output_num = AnfAlgo::GetOutputTensorNum(kernel_node); if (kernel_attr.GetOutputSize() != output_num) { MS_LOG(DEBUG) << "Output num is not equal!"; diff --git a/mindspore/core/ir/anf.cc b/mindspore/core/ir/anf.cc index e238012b14..79a1cacff0 100644 --- a/mindspore/core/ir/anf.cc +++ b/mindspore/core/ir/anf.cc @@ -222,7 +222,7 @@ std::string GetCNodeTarget(const AnfNodePtr &node) { } auto target = GetValue(att_target); if (kTargetSet.find(target) == kTargetSet.end()) { - MS_LOG(EXCEPTION) << "Only support string CPU|GPU|Ascend for primitive_target"; + MS_LOG(EXCEPTION) << "Only support string CPU|GPU|Ascend for primitive_target, but get " << target; } return target; }