# Copyright 2019 Huawei Technologies Co., Ltd
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ============================================================================

import pytest
import numpy as np
from mindspore.nn.optim import Momentum
from mindspore.ops import operations as P
from mindspore.nn import TrainOneStepCell, WithLossCell
from mindspore.nn import Dense
from mindspore import Tensor
from mindspore.common.initializer import initializer
from mindspore.common.parameter import Parameter
import mindspore.context as context
import mindspore.nn as nn

context.set_context(mode=context.GRAPH_MODE, device_target="GPU")

def InitialLstmWeight(input_size, hidden_size, num_layers, bidirectional, has_bias=False):
    num_directions = 1
    if bidirectional:
        num_directions = 2

    weight_size = 0
    gate_size = 4 * hidden_size
    for layer in range(num_layers):
        for d in range(num_directions):
            input_layer_size = input_size if layer == 0 else hidden_size * num_directions
            weight_size += gate_size * input_layer_size
            weight_size += gate_size * hidden_size
            if has_bias:
                weight_size += 2 * gate_size

    w_np = np.ones([weight_size, 1, 1]).astype(np.float32) * 0.01

    w = Parameter(initializer(Tensor(w_np), w_np.shape), name='w')

    h = Parameter(initializer(
        Tensor(np.ones((num_layers * num_directions, batch_size, hidden_size)).astype(np.float32)),
        [num_layers * num_directions, batch_size, hidden_size]), name='h')

    c = Parameter(initializer(
        Tensor(np.ones((num_layers * num_directions, batch_size, hidden_size)).astype(np.float32)),
        [num_layers * num_directions, batch_size, hidden_size]), name='c')

    return h, c, w

class SentimentNet(nn.Cell):
    def __init__(self, vocab_size, embed_size, num_hiddens, num_layers,
                 bidirectional, weight, labels, batch_size):
        super(SentimentNet, self).__init__()
        self.num_hiddens = num_hiddens
        self.num_layers = num_layers
        self.bidirectional = bidirectional
        self.batch_size = batch_size

        self.embedding = nn.Embedding(vocab_size, embed_size, use_one_hot=False, embedding_table=Tensor(weight))
        self.embedding.embedding_table.requires_grad = False
        self.trans = P.Transpose()
        self.perm = (1, 0, 2)
        self.h, self.c, self.w = InitialLstmWeight(embed_size, num_hiddens, num_layers, bidirectional)
        self.encoder = P.LSTM(input_size=embed_size, hidden_size=self.num_hiddens,
                              num_layers=num_layers, has_bias=False,
                              bidirectional=self.bidirectional, dropout=0.0)
        self.concat = P.Concat(2)
        if self.bidirectional:
            self.decoder = nn.Dense(num_hiddens * 4, labels)
        else:
            self.decoder = nn.Dense(num_hiddens * 2, labels)

        self.slice1 = P.Slice()
        self.slice2 = P.Slice()
        self.reshape = P.Reshape()

        self.num_direction = 1
        if bidirectional:
            self.num_direction = 2

    def construct(self, inputs):
        embeddings = self.embedding(inputs)
        embeddings = self.trans(embeddings, self.perm)
        output, hidden = self.encoder(embeddings, self.h, self.c, self.w)

        output0 = self.slice1(output, (0, 0, 0), (1, 64, 200))
        output1 = self.slice2(output, (499, 0, 0), (1, 64, 200))
        encoding = self.concat((output0, output1))
        encoding = self.reshape(encoding, (self.batch_size, self.num_hiddens * self.num_direction * 2))
        outputs = self.decoder(encoding)
        return outputs

batch_size = 64
@pytest.mark.level0
@pytest.mark.platform_x86_gpu_training
@pytest.mark.env_onecard
def test_LSTM():
    num_epochs = 5
    embed_size = 100
    num_hiddens = 100
    num_layers = 2
    bidirectional = True
    labels = 2
    vocab_size = 252193
    max_len = 500

    weight = np.ones((vocab_size+1, embed_size)).astype(np.float32)

    net = SentimentNet(vocab_size=(vocab_size+1), embed_size=embed_size,
                       num_hiddens=num_hiddens, num_layers=num_layers,
                       bidirectional=bidirectional, weight=weight,
                       labels=labels, batch_size=batch_size)

    learning_rate = 0.1
    momentum = 0.9

    optimizer = Momentum(filter(lambda x: x.requires_grad, net.get_parameters()), learning_rate, momentum)
    criterion = nn.SoftmaxCrossEntropyWithLogits(is_grad=False, sparse=True)
    net_with_criterion = WithLossCell(net, criterion)
    train_network = TrainOneStepCell(net_with_criterion, optimizer)  # optimizer
    train_network.set_train()

    train_features = Tensor(np.ones([64, max_len]).astype(np.int32))
    train_labels = Tensor(np.ones([64,]).astype(np.int32)[0:64])
    losses = []
    for epoch in range(num_epochs):
        loss = train_network(train_features, train_labels)
        losses.append(loss)
    assert(losses[-1].asnumpy() < 0.01)