From 3bd03ddecc394d5c62a5279e1cb2c9da5d3c82ac Mon Sep 17 00:00:00 2001 From: baihuawei Date: Thu, 11 Jun 2020 11:16:11 +0800 Subject: [PATCH] fix lstm weight initializer issue --- .../ccsrc/kernel/cpu/mkldnn/lstm_cpu_kernel.cc | 7 +++++++ .../ccsrc/kernel/cpu/mkldnn/lstm_grad_cpu_kernel.cc | 7 +++++++ mindspore/nn/layer/lstm.py | 13 +++++++------ 3 files changed, 21 insertions(+), 6 deletions(-) diff --git a/mindspore/ccsrc/kernel/cpu/mkldnn/lstm_cpu_kernel.cc b/mindspore/ccsrc/kernel/cpu/mkldnn/lstm_cpu_kernel.cc index e0cd2bc552..c8e1c0403b 100644 --- a/mindspore/ccsrc/kernel/cpu/mkldnn/lstm_cpu_kernel.cc +++ b/mindspore/ccsrc/kernel/cpu/mkldnn/lstm_cpu_kernel.cc @@ -27,6 +27,7 @@ void LstmCPUKernel::InitKernel(const CNodePtr &kernel_node) { using dim = dnnl::memory::dims; std::vector src_shape = AnfAlgo::GetInputDeviceShape(kernel_node, 0); std::vector src_h_shape = AnfAlgo::GetInputDeviceShape(kernel_node, 1); + std::vector src_c_shape = AnfAlgo::GetInputDeviceShape(kernel_node, 2); bidirectional_ = AnfAlgo::GetNodeAttr(kernel_node, "bidirectional"); input_size_ = AnfAlgo::GetNodeAttr(kernel_node, "input_size"); hidden_size_ = AnfAlgo::GetNodeAttr(kernel_node, "hidden_size"); @@ -41,6 +42,12 @@ void LstmCPUKernel::InitKernel(const CNodePtr &kernel_node) { if (num_directions_ * num_layers_ != SizeToInt(src_h_shape[0])) { MS_LOG(EXCEPTION) << "error iteration shape!"; } + if (num_layers_ <= 0) { + MS_LOG(EXCEPTION) << "layers must be greater than zero!"; + } + if (src_shape.size() != 3 || src_h_shape.size() != 3 || src_c_shape.size() != 3) { + MS_LOG(EXCEPTION) << "conv2d only support 3-D input!"; + } const int gate_size = 4 * hidden_size_; for (int i = 0; i < num_layers_; ++i) { weight_size_ += gate_size * (i == 0 ? input_size_ : hidden_size_ * num_directions_); diff --git a/mindspore/ccsrc/kernel/cpu/mkldnn/lstm_grad_cpu_kernel.cc b/mindspore/ccsrc/kernel/cpu/mkldnn/lstm_grad_cpu_kernel.cc index cd8b8d5b80..5d138089d6 100644 --- a/mindspore/ccsrc/kernel/cpu/mkldnn/lstm_grad_cpu_kernel.cc +++ b/mindspore/ccsrc/kernel/cpu/mkldnn/lstm_grad_cpu_kernel.cc @@ -31,6 +31,7 @@ void LSTMGradCPUKernel::InitKernel(const CNodePtr &kernel_node) { auto eng = MKLKernelEngine::Get().engine(); std::vector src_shape = AnfAlgo::GetInputDeviceShape(kernel_node, 0); std::vector src_h_shape = AnfAlgo::GetInputDeviceShape(kernel_node, 1); + std::vector src_c_shape = AnfAlgo::GetInputDeviceShape(kernel_node, 2); bidirectional_ = AnfAlgo::GetNodeAttr(kernel_node, "bidirectional"); input_size_ = AnfAlgo::GetNodeAttr(kernel_node, "input_size"); hidden_size_ = AnfAlgo::GetNodeAttr(kernel_node, "hidden_size"); @@ -45,6 +46,12 @@ void LSTMGradCPUKernel::InitKernel(const CNodePtr &kernel_node) { if (num_directions_ * num_layers_ != SizeToInt(src_h_shape[0])) { MS_LOG(EXCEPTION) << "error iteration shape!"; } + if (num_layers_ <= 0) { + MS_LOG(EXCEPTION) << "layers must be greater than zero!"; + } + if (src_shape.size() != 3 || src_h_shape.size() != 3 || src_c_shape.size() != 3) { + MS_LOG(EXCEPTION) << "conv2d only support 3-D input!"; + } const int gate_size = 4 * hidden_size_; for (int i = 0; i < num_layers_; ++i) { weight_size_ += gate_size * (i == 0 ? input_size_ : hidden_size_ * num_directions_); diff --git a/mindspore/nn/layer/lstm.py b/mindspore/nn/layer/lstm.py index 5390c67f37..e3fba5ff3f 100755 --- a/mindspore/nn/layer/lstm.py +++ b/mindspore/nn/layer/lstm.py @@ -13,8 +13,8 @@ # limitations under the License. # ============================================================================ """lstm""" +import math import numpy as np - import mindspore.nn as nn from mindspore import context from mindspore._checkparam import Validator as validator @@ -148,7 +148,9 @@ class LSTM(Cell): if self.has_bias: increment_size += 2 * gate_size weight_size += increment_size * num_directions - self.weight = Parameter(initializer(0.0, [weight_size, 1, 1]), name='weight') + stdv = 1 / math.sqrt(hidden_size) + w_np = np.random.uniform(-stdv, stdv, (weight_size, 1, 1)).astype(np.float32) + self.weight = Parameter(initializer(Tensor(w_np), [weight_size, 1, 1]), name='weight') else: input_size_list = [] input_size_list.append(self.input_size) @@ -157,14 +159,13 @@ class LSTM(Cell): weights = [] layers = [] bias_size = 0 if not self.has_bias else num_directions * self.hidden_size * 4 + stdv = 1 / math.sqrt(hidden_size) for i in range(num_layers): weight_size = (input_size_list[i] + self.hidden_size) * num_directions * self.hidden_size * 4 - w_np = np.ones([weight_size, 1, 1]).astype(np.float32) * 0.01 if has_bias: - bias_np = np.zeros([bias_size, 1, 1]).astype(np.float32) - w_np = np.concatenate([w_np, bias_np], axis=0) + weight_size = weight_size + bias_size + w_np = np.random.uniform(-stdv, stdv, (weight_size, 1, 1)).astype(np.float32) weights.append(Parameter(initializer(Tensor(w_np), w_np.shape), name='weight' + str(i))) - layers.append(nn.LSTMCell(input_size=input_size_list[i], hidden_size=self.hidden_size, has_bias=self.has_bias,