# Copyright 2019 Huawei Technologies Co., Ltd # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. # ============================================================================ import pytest import numpy as np from mindspore.nn.optim import Momentum from mindspore.ops import operations as P from mindspore.nn import TrainOneStepCell, WithLossCell from mindspore.nn import Dense from mindspore import Tensor from mindspore.common.initializer import initializer from mindspore.common.parameter import Parameter import mindspore.context as context import mindspore.nn as nn context.set_context(mode=context.GRAPH_MODE, device_target="GPU") def InitialLstmWeight(input_size, hidden_size, num_layers, bidirectional, has_bias=False): num_directions = 1 if bidirectional: num_directions = 2 weight_size = 0 gate_size = 4 * hidden_size for layer in range(num_layers): for d in range(num_directions): input_layer_size = input_size if layer == 0 else hidden_size * num_directions weight_size += gate_size * input_layer_size weight_size += gate_size * hidden_size if has_bias: weight_size += 2 * gate_size w_np = np.ones([weight_size, 1, 1]).astype(np.float32) * 0.01 w = Parameter(initializer(Tensor(w_np), w_np.shape), name='w') h = Parameter(initializer( Tensor(np.ones((num_layers * num_directions, batch_size, hidden_size)).astype(np.float32)), [num_layers * num_directions, batch_size, hidden_size]), name='h') c = Parameter(initializer( Tensor(np.ones((num_layers * num_directions, batch_size, hidden_size)).astype(np.float32)), [num_layers * num_directions, batch_size, hidden_size]), name='c') return h, c, w class SentimentNet(nn.Cell): def __init__(self, vocab_size, embed_size, num_hiddens, num_layers, bidirectional, weight, labels, batch_size): super(SentimentNet, self).__init__() self.num_hiddens = num_hiddens self.num_layers = num_layers self.bidirectional = bidirectional self.batch_size = batch_size self.embedding = nn.Embedding(vocab_size, embed_size, use_one_hot=False, embedding_table=Tensor(weight)) self.embedding.embedding_table.requires_grad = False self.trans = P.Transpose() self.perm = (1, 0, 2) self.h, self.c, self.w = InitialLstmWeight(embed_size, num_hiddens, num_layers, bidirectional) self.encoder = P.LSTM(input_size=embed_size, hidden_size=self.num_hiddens, num_layers=num_layers, has_bias=False, bidirectional=self.bidirectional, dropout=0.0) self.concat = P.Concat(2) if self.bidirectional: self.decoder = nn.Dense(num_hiddens * 4, labels) else: self.decoder = nn.Dense(num_hiddens * 2, labels) self.slice1 = P.Slice() self.slice2 = P.Slice() self.reshape = P.Reshape() self.num_direction = 1 if bidirectional: self.num_direction = 2 def construct(self, inputs): embeddings = self.embedding(inputs) embeddings = self.trans(embeddings, self.perm) output, hidden = self.encoder(embeddings, self.h, self.c, self.w) output0 = self.slice1(output, (0, 0, 0), (1, 64, 200)) output1 = self.slice2(output, (499, 0, 0), (1, 64, 200)) encoding = self.concat((output0, output1)) encoding = self.reshape(encoding, (self.batch_size, self.num_hiddens * self.num_direction * 2)) outputs = self.decoder(encoding) return outputs batch_size = 64 @pytest.mark.level0 @pytest.mark.platform_x86_gpu_training @pytest.mark.env_onecard def test_LSTM(): num_epochs = 5 embed_size = 100 num_hiddens = 100 num_layers = 2 bidirectional = True labels = 2 vocab_size = 252193 max_len = 500 weight = np.ones((vocab_size+1, embed_size)).astype(np.float32) net = SentimentNet(vocab_size=(vocab_size+1), embed_size=embed_size, num_hiddens=num_hiddens, num_layers=num_layers, bidirectional=bidirectional, weight=weight, labels=labels, batch_size=batch_size) learning_rate = 0.1 momentum = 0.9 optimizer = Momentum(filter(lambda x: x.requires_grad, net.get_parameters()), learning_rate, momentum) criterion = nn.SoftmaxCrossEntropyWithLogits(is_grad=False, sparse=True) net_with_criterion = WithLossCell(net, criterion) train_network = TrainOneStepCell(net_with_criterion, optimizer) # optimizer train_network.set_train() train_features = Tensor(np.ones([64, max_len]).astype(np.int32)) train_labels = Tensor(np.ones([64,]).astype(np.int32)[0:64]) losses = [] for epoch in range(num_epochs): loss = train_network(train_features, train_labels) losses.append(loss) assert(losses[-1].asnumpy() < 0.01)