You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
mindspore/tests/st/networks/test_gpu_resnet.py

429 lines
14 KiB

# Copyright 2019 Huawei Technologies Co., Ltd
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ============================================================================
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import numpy as np
5 years ago
import pytest
import mindspore.context as context
import mindspore.nn as nn
from mindspore import Tensor, ParameterTuple
5 years ago
from mindspore import amp
from mindspore.nn import Dense
from mindspore.nn import TrainOneStepCell, WithLossCell, ForwardValueAndGrad
from mindspore.nn.cell import Cell
from mindspore.nn.layer.basic import Flatten
5 years ago
from mindspore.nn.layer.conv import Conv2d
from mindspore.nn.layer.normalization import BatchNorm2d
from mindspore.nn.layer.pooling import MaxPool2d
from mindspore.nn.optim import Momentum
from mindspore.ops import operations as P
from mindspore.ops import functional as F
from mindspore.ops.operations import Add
context.set_context(mode=context.GRAPH_MODE, device_target="GPU")
def random_normal_init(shape, mean=0.0, stddev=0.01, seed=None):
init_value = np.ones(shape).astype(np.float32) * 0.01
return Tensor(init_value)
def variance_scaling_raw(shape):
variance_scaling_value = np.ones(shape).astype(np.float32) * 0.01
return Tensor(variance_scaling_value)
def weight_variable_0(shape):
zeros = np.zeros(shape).astype(np.float32)
return Tensor(zeros)
def weight_variable_1(shape):
ones = np.ones(shape).astype(np.float32)
return Tensor(ones)
def conv3x3(in_channels, out_channels, stride=1, padding=1):
"""3x3 convolution """
weight_shape = (out_channels, in_channels, 3, 3)
weight = variance_scaling_raw(weight_shape)
return Conv2d(in_channels, out_channels,
kernel_size=3, stride=stride, weight_init=weight, has_bias=False, pad_mode="same")
def conv1x1(in_channels, out_channels, stride=1, padding=0):
"""1x1 convolution"""
weight_shape = (out_channels, in_channels, 1, 1)
weight = variance_scaling_raw(weight_shape)
return Conv2d(in_channels, out_channels,
kernel_size=1, stride=stride, weight_init=weight, has_bias=False, pad_mode="same")
def conv7x7(in_channels, out_channels, stride=1, padding=0):
"""1x1 convolution"""
weight_shape = (out_channels, in_channels, 7, 7)
weight = variance_scaling_raw(weight_shape)
return Conv2d(in_channels, out_channels,
kernel_size=7, stride=stride, weight_init=weight, has_bias=False, pad_mode="same")
def bn_with_initialize(out_channels):
shape = (out_channels)
mean = weight_variable_0(shape)
var = weight_variable_1(shape)
beta = weight_variable_0(shape)
gamma = weight_variable_1(shape)
bn = BatchNorm2d(out_channels, momentum=0.1, eps=0.0001, gamma_init=gamma,
beta_init=beta, moving_mean_init=mean, moving_var_init=var)
return bn
def bn_with_initialize_last(out_channels):
shape = (out_channels)
mean = weight_variable_0(shape)
var = weight_variable_1(shape)
beta = weight_variable_0(shape)
gamma = weight_variable_0(shape)
bn = BatchNorm2d(out_channels, momentum=0.1, eps=0.0001, gamma_init=gamma,
beta_init=beta, moving_mean_init=mean, moving_var_init=var)
return bn
def fc_with_initialize(input_channels, out_channels):
weight_shape = (out_channels, input_channels)
bias_shape = (out_channels)
weight = random_normal_init(weight_shape)
bias = weight_variable_0(bias_shape)
return Dense(input_channels, out_channels, weight, bias)
class ResidualBlock(Cell):
expansion = 4
def __init__(self,
in_channels,
out_channels,
stride=1,
down_sample=False):
super(ResidualBlock, self).__init__()
out_chls = out_channels // self.expansion
self.conv1 = conv1x1(in_channels, out_chls, stride=1, padding=0)
self.bn1 = bn_with_initialize(out_chls)
self.conv2 = conv3x3(out_chls, out_chls, stride=stride, padding=1)
self.bn2 = bn_with_initialize(out_chls)
self.conv3 = conv1x1(out_chls, out_channels, stride=1, padding=0)
self.bn3 = bn_with_initialize_last(out_channels)
self.relu = P.ReLU()
self.add = Add()
def construct(self, x):
identity = x
out = self.conv1(x)
out = self.bn1(out)
out = self.relu(out)
out = self.conv2(out)
out = self.bn2(out)
out = self.relu(out)
out = self.conv3(out)
out = self.bn3(out)
out = self.add(out, identity)
out = self.relu(out)
return out
class ResidualBlockWithDown(Cell):
expansion = 4
def __init__(self,
in_channels,
out_channels,
stride=1,
down_sample=False):
super(ResidualBlockWithDown, self).__init__()
out_chls = out_channels // self.expansion
self.conv1 = conv1x1(in_channels, out_chls, stride=1, padding=0)
self.bn1 = bn_with_initialize(out_chls)
self.conv2 = conv3x3(out_chls, out_chls, stride=stride, padding=1)
self.bn2 = bn_with_initialize(out_chls)
self.conv3 = conv1x1(out_chls, out_channels, stride=1, padding=0)
self.bn3 = bn_with_initialize_last(out_channels)
self.relu = P.ReLU()
self.downSample = down_sample
self.conv_down_sample = conv1x1(
in_channels, out_channels, stride=stride, padding=0)
self.bn_down_sample = bn_with_initialize(out_channels)
self.add = Add()
def construct(self, x):
identity = x
out = self.conv1(x)
out = self.bn1(out)
out = self.relu(out)
out = self.conv2(out)
out = self.bn2(out)
out = self.relu(out)
out = self.conv3(out)
out = self.bn3(out)
identity = self.conv_down_sample(identity)
identity = self.bn_down_sample(identity)
out = self.add(out, identity)
out = self.relu(out)
return out
class MakeLayer0(Cell):
def __init__(self, block, layer_num, in_channels, out_channels, stride):
super(MakeLayer0, self).__init__()
self.a = ResidualBlockWithDown(
in_channels, out_channels, stride=1, down_sample=True)
self.b = block(out_channels, out_channels, stride=stride)
self.c = block(out_channels, out_channels, stride=1)
def construct(self, x):
x = self.a(x)
x = self.b(x)
x = self.c(x)
return x
class MakeLayer1(Cell):
def __init__(self, block, layer_num, in_channels, out_channels, stride):
super(MakeLayer1, self).__init__()
self.a = ResidualBlockWithDown(
in_channels, out_channels, stride=stride, down_sample=True)
self.b = block(out_channels, out_channels, stride=1)
self.c = block(out_channels, out_channels, stride=1)
self.d = block(out_channels, out_channels, stride=1)
def construct(self, x):
x = self.a(x)
x = self.b(x)
x = self.c(x)
x = self.d(x)
return x
class MakeLayer2(Cell):
def __init__(self, block, layer_num, in_channels, out_channels, stride):
super(MakeLayer2, self).__init__()
self.a = ResidualBlockWithDown(
in_channels, out_channels, stride=stride, down_sample=True)
self.b = block(out_channels, out_channels, stride=1)
self.c = block(out_channels, out_channels, stride=1)
self.d = block(out_channels, out_channels, stride=1)
self.e = block(out_channels, out_channels, stride=1)
self.f = block(out_channels, out_channels, stride=1)
def construct(self, x):
x = self.a(x)
x = self.b(x)
x = self.c(x)
x = self.d(x)
x = self.e(x)
x = self.f(x)
return x
class MakeLayer3(Cell):
def __init__(self, block, layer_num, in_channels, out_channels, stride):
super(MakeLayer3, self).__init__()
self.a = ResidualBlockWithDown(
in_channels, out_channels, stride=stride, down_sample=True)
self.b = block(out_channels, out_channels, stride=1)
self.c = block(out_channels, out_channels, stride=1)
def construct(self, x):
x = self.a(x)
x = self.b(x)
x = self.c(x)
return x
class ResNet(Cell):
def __init__(self, block, layer_num, num_classes=100):
super(ResNet, self).__init__()
self.conv1 = conv7x7(3, 64, stride=2, padding=3)
self.bn1 = bn_with_initialize(64)
self.relu = P.ReLU()
self.maxpool = MaxPool2d(kernel_size=3, stride=2, pad_mode="same")
self.layer1 = MakeLayer0(
block, layer_num[0], in_channels=64, out_channels=256, stride=1)
self.layer2 = MakeLayer1(
block, layer_num[1], in_channels=256, out_channels=512, stride=2)
self.layer3 = MakeLayer2(
block, layer_num[2], in_channels=512, out_channels=1024, stride=2)
self.layer4 = MakeLayer3(
block, layer_num[3], in_channels=1024, out_channels=2048, stride=2)
self.pool = nn.AvgPool2d(7, 1)
self.fc = fc_with_initialize(512 * block.expansion, num_classes)
self.flatten = Flatten()
def construct(self, x):
x = self.conv1(x)
x = self.bn1(x)
x = self.relu(x)
x = self.maxpool(x)
x = self.layer1(x)
x = self.layer2(x)
x = self.layer3(x)
x = self.layer4(x)
x = self.pool(x)
x = self.flatten(x)
x = self.fc(x)
return x
def resnet50(num_classes):
return ResNet(ResidualBlock, [3, 4, 6, 3], num_classes)
@pytest.mark.level0
@pytest.mark.platform_x86_gpu_training
@pytest.mark.env_onecard
def test_trainTensor(num_classes=10, epoch=8, batch_size=1):
net = resnet50(num_classes)
lr = 0.1
momentum = 0.9
optimizer = Momentum(filter(lambda x: x.requires_grad,
net.get_parameters()), lr, momentum)
criterion = nn.SoftmaxCrossEntropyWithLogits(sparse=True, reduction='mean')
net_with_criterion = WithLossCell(net, criterion)
train_network = TrainOneStepCell(
net_with_criterion, optimizer) # optimizer
train_network.set_train()
losses = []
for i in range(0, epoch):
data = Tensor(np.ones([batch_size, 3, 224, 224]
).astype(np.float32) * 0.01)
label = Tensor(np.ones([batch_size]).astype(np.int32))
loss = train_network(data, label)
losses.append(loss)
assert (losses[-1].asnumpy() < 1)
@pytest.mark.level0
@pytest.mark.platform_x86_gpu_training
@pytest.mark.env_onecard
def test_trainTensor_big_batchSize(num_classes=10, epoch=8, batch_size=338):
net = resnet50(num_classes)
lr = 0.1
momentum = 0.9
optimizer = Momentum(filter(lambda x: x.requires_grad,
net.get_parameters()), lr, momentum)
criterion = nn.SoftmaxCrossEntropyWithLogits(sparse=True, reduction='mean')
net_with_criterion = WithLossCell(net, criterion)
train_network = TrainOneStepCell(
net_with_criterion, optimizer) # optimizer
train_network.set_train()
losses = []
for i in range(0, epoch):
data = Tensor(np.ones([batch_size, 3, 224, 224]
).astype(np.float32) * 0.01)
label = Tensor(np.ones([batch_size]).astype(np.int32))
loss = train_network(data, label)
losses.append(loss)
5 years ago
assert (losses[-1].asnumpy() < 1)
@pytest.mark.level0
@pytest.mark.platform_x86_gpu_training
@pytest.mark.env_onecard
def test_trainTensor_amp(num_classes=10, epoch=18, batch_size=16):
net = resnet50(num_classes)
lr = 0.1
momentum = 0.9
optimizer = Momentum(filter(lambda x: x.requires_grad,
net.get_parameters()), lr, momentum)
criterion = nn.SoftmaxCrossEntropyWithLogits(sparse=True, reduction='mean')
train_network = amp.build_train_network(
net, optimizer, criterion, level="O2")
train_network.set_train()
losses = []
for i in range(0, epoch):
data = Tensor(np.ones([batch_size, 3, 224, 224]
).astype(np.float32) * 0.01)
label = Tensor(np.ones([batch_size]).astype(np.int32))
loss = train_network(data, label)
losses.append(loss)
5 years ago
assert (losses[-1][0].asnumpy() < 1)
5 years ago
assert not losses[-1][1].asnumpy()
5 years ago
assert (losses[-1][2].asnumpy() > 1)
@pytest.mark.level0
@pytest.mark.platform_x86_gpu_training
@pytest.mark.env_onecard
def test_trainTensor_with_new_interface(num_classes=10, epoch=8, batch_size=1):
net = resnet50(num_classes)
criterion = nn.SoftmaxCrossEntropyWithLogits(sparse=True, reduction='mean')
net_with_criterion = WithLossCell(net, criterion)
net_with_criterion.set_train()
weights = ParameterTuple(filter(lambda x: x.requires_grad, net.get_parameters()))
optimizer = Momentum(weights, 0.1, 0.9)
train_network = ForwardValueAndGrad(network=net_with_criterion, weights=weights, get_by_list=True, sens_param=True)
losses = []
for i in range(0, epoch):
data = Tensor(np.ones([batch_size, 3, 224, 224]
).astype(np.float32) * 0.01)
label = Tensor(np.ones([batch_size]).astype(np.int32))
sens = Tensor(np.ones([1]).astype(np.float32))
loss, grads = train_network(data, label, sens)
grads = F.identity(grads)
optimizer(grads)
losses.append(loss)
assert (losses[-1].asnumpy() < 0.8)