parent
b0c59710e2
commit
a2eb45b199
@ -0,0 +1 @@
|
||||
This directory store the annotation files of train data
|
||||
@ -0,0 +1,66 @@
|
||||
name: pytorch
|
||||
channels:
|
||||
- soumith
|
||||
- https://mirrors.tuna.tsinghua.edu.cn/anaconda/pkgs/free
|
||||
- https://mirrors.tuna.tsinghua.edu.cn/anaconda/pkgs/free/
|
||||
- defaults
|
||||
dependencies:
|
||||
- cairo=1.14.8=0
|
||||
- certifi=2016.2.28=py27_0
|
||||
- cffi=1.10.0=py27_0
|
||||
- fontconfig=2.12.1=3
|
||||
- freetype=2.5.5=2
|
||||
- glib=2.50.2=1
|
||||
- harfbuzz=0.9.39=2
|
||||
- hdf5=1.8.17=2
|
||||
- jbig=2.1=0
|
||||
- jpeg=8d=2
|
||||
- libffi=3.2.1=1
|
||||
- libgcc=5.2.0=0
|
||||
- libiconv=1.14=0
|
||||
- libpng=1.6.30=1
|
||||
- libtiff=4.0.6=2
|
||||
- libxml2=2.9.4=0
|
||||
- mkl=2017.0.3=0
|
||||
- numpy=1.12.1=py27_0
|
||||
- olefile=0.44=py27_0
|
||||
- opencv=3.1.0=np112py27_1
|
||||
- openssl=1.0.2l=0
|
||||
- pcre=8.39=1
|
||||
- pillow=3.4.2=py27_0
|
||||
- pip=9.0.1=py27_1
|
||||
- pixman=0.34.0=0
|
||||
- pycparser=2.18=py27_0
|
||||
- python=2.7.13=0
|
||||
- readline=6.2=2
|
||||
- setuptools=36.4.0=py27_1
|
||||
- six=1.10.0=py27_0
|
||||
- sqlite=3.13.0=0
|
||||
- tk=8.5.18=0
|
||||
- wheel=0.29.0=py27_0
|
||||
- xz=5.2.3=0
|
||||
- zlib=1.2.11=0
|
||||
- cycler=0.10.0=py27_0
|
||||
- dbus=1.10.20=0
|
||||
- expat=2.1.0=0
|
||||
- functools32=3.2.3.2=py27_0
|
||||
- gst-plugins-base=1.8.0=0
|
||||
- gstreamer=1.8.0=0
|
||||
- icu=54.1=0
|
||||
- libxcb=1.12=1
|
||||
- matplotlib=2.0.2=np112py27_0
|
||||
- pycairo=1.10.0=py27_0
|
||||
- pyparsing=2.2.0=py27_0
|
||||
- pyqt=5.6.0=py27_2
|
||||
- python-dateutil=2.6.1=py27_0
|
||||
- pytz=2017.2=py27_0
|
||||
- qt=5.6.2=2
|
||||
- sip=4.18=py27_0
|
||||
- subprocess32=3.2.7=py27_0
|
||||
- cuda80=1.0=0
|
||||
- pytorch=0.2.0=py27hc03bea1_4cu80
|
||||
- torchvision=0.1.9=py27hdb88a65_1
|
||||
- pip:
|
||||
- torch==0.2.0.post4
|
||||
prefix: /home/asy/.conda/envs/pytorch
|
||||
|
||||
@ -0,0 +1 @@
|
||||
This directory store trained model net parameters and structure
|
||||
@ -0,0 +1,42 @@
|
||||
import os
|
||||
|
||||
|
||||
MODEL_STORE_DIR = os.path.dirname(os.path.dirname(os.path.realpath(__file__)))+"/model_store"
|
||||
|
||||
|
||||
ANNO_STORE_DIR = os.path.dirname(os.path.dirname(os.path.realpath(__file__)))+"/anno_store"
|
||||
|
||||
|
||||
LOG_DIR = os.path.dirname(os.path.dirname(os.path.realpath(__file__)))+"/log"
|
||||
|
||||
|
||||
USE_CUDA = True
|
||||
|
||||
|
||||
TRAIN_BATCH_SIZE = 512
|
||||
|
||||
TRAIN_LR = 0.01
|
||||
|
||||
END_EPOCH = 10
|
||||
|
||||
|
||||
PNET_POSTIVE_ANNO_FILENAME = "pos_12.txt"
|
||||
PNET_NEGATIVE_ANNO_FILENAME = "neg_12.txt"
|
||||
PNET_PART_ANNO_FILENAME = "part_12.txt"
|
||||
PNET_LANDMARK_ANNO_FILENAME = "landmark_12.txt"
|
||||
|
||||
|
||||
RNET_POSTIVE_ANNO_FILENAME = "pos_24.txt"
|
||||
RNET_NEGATIVE_ANNO_FILENAME = "neg_24.txt"
|
||||
RNET_PART_ANNO_FILENAME = "part_24.txt"
|
||||
RNET_LANDMARK_ANNO_FILENAME = "landmark_24.txt"
|
||||
|
||||
|
||||
ONET_POSTIVE_ANNO_FILENAME = "pos_48.txt"
|
||||
ONET_NEGATIVE_ANNO_FILENAME = "neg_48.txt"
|
||||
ONET_PART_ANNO_FILENAME = "part_48.txt"
|
||||
ONET_LANDMARK_ANNO_FILENAME = "landmark_48.txt"
|
||||
|
||||
PNET_TRAIN_IMGLIST_FILENAME = "imglist_anno_12.txt"
|
||||
RNET_TRAIN_IMGLIST_FILENAME = "imglist_anno_24.txt"
|
||||
ONET_TRAIN_IMGLIST_FILENAME = "imglist_anno_48.txt"
|
||||
File diff suppressed because it is too large
Load Diff
@ -0,0 +1,171 @@
|
||||
import numpy as np
|
||||
import cv2
|
||||
|
||||
|
||||
|
||||
class TrainImageReader:
|
||||
def __init__(self, imdb, im_size, batch_size=128, shuffle=False):
|
||||
|
||||
self.imdb = imdb
|
||||
self.batch_size = batch_size
|
||||
self.im_size = im_size
|
||||
self.shuffle = shuffle
|
||||
|
||||
self.cur = 0
|
||||
self.size = len(imdb)
|
||||
self.index = np.arange(self.size)
|
||||
self.num_classes = 2
|
||||
|
||||
self.batch = None
|
||||
self.data = None
|
||||
self.label = None
|
||||
|
||||
self.label_names= ['label', 'bbox_target', 'landmark_target']
|
||||
self.reset()
|
||||
self.get_batch()
|
||||
|
||||
def reset(self):
|
||||
self.cur = 0
|
||||
if self.shuffle:
|
||||
np.random.shuffle(self.index)
|
||||
|
||||
def iter_next(self):
|
||||
return self.cur + self.batch_size <= self.size
|
||||
|
||||
def __iter__(self):
|
||||
return self
|
||||
|
||||
def __next__(self):
|
||||
return self.next()
|
||||
|
||||
def next(self):
|
||||
if self.iter_next():
|
||||
self.get_batch()
|
||||
self.cur += self.batch_size
|
||||
return self.data,self.label
|
||||
else:
|
||||
raise StopIteration
|
||||
|
||||
def getindex(self):
|
||||
return self.cur / self.batch_size
|
||||
|
||||
def getpad(self):
|
||||
if self.cur + self.batch_size > self.size:
|
||||
return self.cur + self.batch_size - self.size
|
||||
else:
|
||||
return 0
|
||||
|
||||
def get_batch(self):
|
||||
cur_from = self.cur
|
||||
cur_to = min(cur_from + self.batch_size, self.size)
|
||||
imdb = [self.imdb[self.index[i]] for i in range(cur_from, cur_to)]
|
||||
data, label = get_minibatch(imdb)
|
||||
self.data = data['data']
|
||||
self.label = [label[name] for name in self.label_names]
|
||||
|
||||
|
||||
|
||||
class TestImageLoader:
|
||||
def __init__(self, imdb, batch_size=1, shuffle=False):
|
||||
self.imdb = imdb
|
||||
self.batch_size = batch_size
|
||||
self.shuffle = shuffle
|
||||
self.size = len(imdb)
|
||||
self.index = np.arange(self.size)
|
||||
|
||||
self.cur = 0
|
||||
self.data = None
|
||||
self.label = None
|
||||
|
||||
self.reset()
|
||||
self.get_batch()
|
||||
|
||||
def reset(self):
|
||||
self.cur = 0
|
||||
if self.shuffle:
|
||||
np.random.shuffle(self.index)
|
||||
|
||||
def iter_next(self):
|
||||
return self.cur + self.batch_size <= self.size
|
||||
|
||||
def __iter__(self):
|
||||
return self
|
||||
|
||||
def __next__(self):
|
||||
return self.next()
|
||||
|
||||
def next(self):
|
||||
if self.iter_next():
|
||||
self.get_batch()
|
||||
self.cur += self.batch_size
|
||||
return self.data
|
||||
else:
|
||||
raise StopIteration
|
||||
|
||||
def getindex(self):
|
||||
return self.cur / self.batch_size
|
||||
|
||||
def getpad(self):
|
||||
if self.cur + self.batch_size > self.size:
|
||||
return self.cur + self.batch_size - self.size
|
||||
else:
|
||||
return 0
|
||||
|
||||
def get_batch(self):
|
||||
cur_from = self.cur
|
||||
cur_to = min(cur_from + self.batch_size, self.size)
|
||||
imdb = [self.imdb[self.index[i]] for i in range(cur_from, cur_to)]
|
||||
data= get_testbatch(imdb)
|
||||
self.data=data['data']
|
||||
|
||||
|
||||
|
||||
|
||||
def get_minibatch(imdb):
|
||||
|
||||
# im_size: 12, 24 or 48
|
||||
num_images = len(imdb)
|
||||
processed_ims = list()
|
||||
cls_label = list()
|
||||
bbox_reg_target = list()
|
||||
landmark_reg_target = list()
|
||||
|
||||
for i in range(num_images):
|
||||
im = cv2.imread(imdb[i]['image'])
|
||||
#im = Image.open(imdb[i]['image'])
|
||||
|
||||
if imdb[i]['flipped']:
|
||||
im = im[:, ::-1, :]
|
||||
#im = im.transpose(Image.FLIP_LEFT_RIGHT)
|
||||
|
||||
cls = imdb[i]['label']
|
||||
bbox_target = imdb[i]['bbox_target']
|
||||
landmark = imdb[i]['landmark_target']
|
||||
|
||||
processed_ims.append(im)
|
||||
cls_label.append(cls)
|
||||
bbox_reg_target.append(bbox_target)
|
||||
landmark_reg_target.append(landmark)
|
||||
|
||||
im_array = np.asarray(processed_ims)
|
||||
|
||||
label_array = np.array(cls_label)
|
||||
|
||||
bbox_target_array = np.vstack(bbox_reg_target)
|
||||
|
||||
landmark_target_array = np.vstack(landmark_reg_target)
|
||||
|
||||
data = {'data': im_array}
|
||||
label = {'label': label_array,
|
||||
'bbox_target': bbox_target_array,
|
||||
'landmark_target': landmark_target_array
|
||||
}
|
||||
|
||||
return data, label
|
||||
|
||||
|
||||
def get_testbatch(imdb):
|
||||
assert len(imdb) == 1, "Single batch only"
|
||||
im = cv2.imread(imdb[0]['image'])
|
||||
data = {'data': im}
|
||||
return data
|
||||
@ -0,0 +1,40 @@
|
||||
import torchvision.transforms as transforms
|
||||
import torch
|
||||
from torch.autograd.variable import Variable
|
||||
import numpy as np
|
||||
|
||||
transform = transforms.ToTensor()
|
||||
|
||||
def convert_image_to_tensor(image):
|
||||
"""convert an image to pytorch tensor
|
||||
|
||||
Parameters:
|
||||
----------
|
||||
image: numpy array , h * w * c
|
||||
|
||||
Returns:
|
||||
-------
|
||||
image_tensor: pytorch.FloatTensor, c * h * w
|
||||
"""
|
||||
image = image.astype(np.float)
|
||||
return transform(image)
|
||||
# return transform(image)
|
||||
|
||||
|
||||
def convert_chwTensor_to_hwcNumpy(tensor):
|
||||
"""convert a group images pytorch tensor(count * c * h * w) to numpy array images(count * h * w * c)
|
||||
Parameters:
|
||||
----------
|
||||
tensor: numpy array , count * c * h * w
|
||||
|
||||
Returns:
|
||||
-------
|
||||
numpy array images: count * h * w * c
|
||||
"""
|
||||
|
||||
if isinstance(tensor, Variable):
|
||||
return np.transpose(tensor.data.numpy(), (0,2,3,1))
|
||||
elif isinstance(tensor, torch.FloatTensor):
|
||||
return np.transpose(tensor.numpy(), (0,2,3,1))
|
||||
else:
|
||||
raise Exception("covert b*c*h*w tensor to b*h*w*c numpy error.This tensor must have 4 dimension.")
|
||||
@ -0,0 +1,162 @@
|
||||
import os
|
||||
import numpy as np
|
||||
|
||||
class ImageDB(object):
|
||||
def __init__(self, image_annotation_file, prefix_path='', mode='train'):
|
||||
self.prefix_path = prefix_path
|
||||
self.image_annotation_file = image_annotation_file
|
||||
self.classes = ['__background__', 'face']
|
||||
self.num_classes = 2
|
||||
self.image_set_index = self.load_image_set_index()
|
||||
self.num_images = len(self.image_set_index)
|
||||
self.mode = mode
|
||||
|
||||
|
||||
def load_image_set_index(self):
|
||||
"""Get image index
|
||||
|
||||
Parameters:
|
||||
----------
|
||||
Returns:
|
||||
-------
|
||||
image_set_index: str
|
||||
relative path of image
|
||||
"""
|
||||
assert os.path.exists(self.image_annotation_file), 'Path does not exist: {}'.format(self.image_annotation_file)
|
||||
with open(self.image_annotation_file, 'r') as f:
|
||||
image_set_index = [x.strip().split(' ')[0] for x in f.readlines()]
|
||||
return image_set_index
|
||||
|
||||
|
||||
def load_imdb(self):
|
||||
"""Get and save ground truth image database
|
||||
|
||||
Parameters:
|
||||
----------
|
||||
Returns:
|
||||
-------
|
||||
gt_imdb: dict
|
||||
image database with annotations
|
||||
"""
|
||||
#cache_file = os.path.join(self.cache_path, self.name + '_gt_roidb.pkl')
|
||||
#if os.path.exists(cache_file):
|
||||
# with open(cache_file, 'rb') as f:
|
||||
# imdb = cPickle.load(f)
|
||||
# print '{} gt imdb loaded from {}'.format(self.name, cache_file)
|
||||
# return imdb
|
||||
gt_imdb = self.load_annotations()
|
||||
#with open(cache_file, 'wb') as f:
|
||||
# cPickle.dump(gt_imdb, f, cPickle.HIGHEST_PROTOCOL)
|
||||
return gt_imdb
|
||||
|
||||
|
||||
def real_image_path(self, index):
|
||||
"""Given image index, return full path
|
||||
|
||||
Parameters:
|
||||
----------
|
||||
index: str
|
||||
relative path of image
|
||||
Returns:
|
||||
-------
|
||||
image_file: str
|
||||
full path of image
|
||||
"""
|
||||
|
||||
index = index.replace("\\", "/")
|
||||
|
||||
if not os.path.exists(index):
|
||||
image_file = os.path.join(self.prefix_path, index)
|
||||
else:
|
||||
image_file=index
|
||||
if not image_file.endswith('.jpg'):
|
||||
image_file = image_file + '.jpg'
|
||||
assert os.path.exists(image_file), 'Path does not exist: {}'.format(image_file)
|
||||
return image_file
|
||||
|
||||
|
||||
def load_annotations(self,annotion_type=1):
|
||||
"""Load annotations
|
||||
|
||||
Parameters:
|
||||
----------
|
||||
annotion_type: int
|
||||
0:dsadsa
|
||||
1:dsadsa
|
||||
Returns:
|
||||
-------
|
||||
imdb: dict
|
||||
image database with annotations
|
||||
"""
|
||||
|
||||
assert os.path.exists(self.image_annotation_file), 'annotations not found at {}'.format(self.image_annotation_file)
|
||||
with open(self.image_annotation_file, 'r') as f:
|
||||
annotations = f.readlines()
|
||||
|
||||
|
||||
imdb = []
|
||||
for i in range(self.num_images):
|
||||
annotation = annotations[i].strip().split(' ')
|
||||
index = annotation[0]
|
||||
im_path = self.real_image_path(index)
|
||||
imdb_ = dict()
|
||||
imdb_['image'] = im_path
|
||||
|
||||
if self.mode == 'test':
|
||||
# gt_boxes = map(float, annotation[1:])
|
||||
# boxes = np.array(bbox, dtype=np.float32).reshape(-1, 4)
|
||||
# imdb_['gt_boxes'] = boxes
|
||||
pass
|
||||
else:
|
||||
label = annotation[1]
|
||||
imdb_['label'] = int(label)
|
||||
imdb_['flipped'] = False
|
||||
imdb_['bbox_target'] = np.zeros((4,))
|
||||
imdb_['landmark_target'] = np.zeros((10,))
|
||||
if len(annotation[2:])==4:
|
||||
bbox_target = annotation[2:6]
|
||||
imdb_['bbox_target'] = np.array(bbox_target).astype(float)
|
||||
if len(annotation[2:])==14:
|
||||
bbox_target = annotation[2:6]
|
||||
imdb_['bbox_target'] = np.array(bbox_target).astype(float)
|
||||
landmark = annotation[6:]
|
||||
imdb_['landmark_target'] = np.array(landmark).astype(float)
|
||||
imdb.append(imdb_)
|
||||
return imdb
|
||||
|
||||
|
||||
def append_flipped_images(self, imdb):
|
||||
"""append flipped images to imdb
|
||||
|
||||
Parameters:
|
||||
----------
|
||||
imdb: imdb
|
||||
image database
|
||||
Returns:
|
||||
-------
|
||||
imdb: dict
|
||||
image database with flipped image annotations added
|
||||
"""
|
||||
print 'append flipped images to imdb', len(imdb)
|
||||
for i in range(len(imdb)):
|
||||
imdb_ = imdb[i]
|
||||
m_bbox = imdb_['bbox_target'].copy()
|
||||
m_bbox[0], m_bbox[2] = -m_bbox[2], -m_bbox[0]
|
||||
|
||||
landmark_ = imdb_['landmark_target'].copy()
|
||||
landmark_ = landmark_.reshape((5, 2))
|
||||
landmark_ = np.asarray([(1 - x, y) for (x, y) in landmark_])
|
||||
landmark_[[0, 1]] = landmark_[[1, 0]]
|
||||
landmark_[[3, 4]] = landmark_[[4, 3]]
|
||||
|
||||
item = {'image': imdb_['image'],
|
||||
'label': imdb_['label'],
|
||||
'bbox_target': m_bbox,
|
||||
'landmark_target': landmark_.reshape((10)),
|
||||
'flipped': True}
|
||||
|
||||
imdb.append(item)
|
||||
self.image_set_index *= 2
|
||||
return imdb
|
||||
|
||||
|
||||
@ -0,0 +1,207 @@
|
||||
import torch
|
||||
import torch.nn as nn
|
||||
import torch.nn.functional as F
|
||||
|
||||
|
||||
def weights_init(m):
|
||||
if isinstance(m, nn.Conv2d) or isinstance(m, nn.Linear):
|
||||
nn.init.xavier_uniform(m.weight.data)
|
||||
nn.init.constant(m.bias, 0.1)
|
||||
|
||||
|
||||
|
||||
class LossFn:
|
||||
def __init__(self, cls_factor=1, box_factor=1, landmark_factor=1):
|
||||
# loss function
|
||||
self.cls_factor = cls_factor
|
||||
self.box_factor = box_factor
|
||||
self.land_factor = landmark_factor
|
||||
self.loss_cls = nn.BCELoss()
|
||||
self.loss_box = nn.MSELoss()
|
||||
self.loss_landmark = nn.MSELoss()
|
||||
|
||||
|
||||
def cls_loss(self,gt_label,pred_label):
|
||||
pred_label = torch.squeeze(pred_label)
|
||||
gt_label = torch.squeeze(gt_label)
|
||||
# get the mask element which >= 0, only 0 and 1 can effect the detection loss
|
||||
mask = torch.ge(gt_label,0)
|
||||
valid_gt_label = torch.masked_select(gt_label,mask)
|
||||
valid_pred_label = torch.masked_select(pred_label,mask)
|
||||
return self.loss_cls(valid_pred_label,valid_gt_label)*self.cls_factor
|
||||
|
||||
|
||||
def box_loss(self,gt_label,gt_offset,pred_offset):
|
||||
pred_offset = torch.squeeze(pred_offset)
|
||||
gt_offset = torch.squeeze(gt_offset)
|
||||
gt_label = torch.squeeze(gt_label)
|
||||
|
||||
#get the mask element which != 0
|
||||
unmask = torch.eq(gt_label,0)
|
||||
mask = torch.eq(unmask,0)
|
||||
#convert mask to dim index
|
||||
chose_index = torch.nonzero(mask.data)
|
||||
chose_index = torch.squeeze(chose_index)
|
||||
#only valid element can effect the loss
|
||||
valid_gt_offset = gt_offset[chose_index,:]
|
||||
valid_pred_offset = pred_offset[chose_index,:]
|
||||
return self.loss_box(valid_pred_offset,valid_gt_offset)*self.box_factor
|
||||
|
||||
|
||||
def landmark_loss(self,gt_label,gt_landmark,pred_landmark):
|
||||
pred_landmark = torch.squeeze(pred_landmark)
|
||||
gt_landmark = torch.squeeze(gt_landmark)
|
||||
gt_label = torch.squeeze(gt_label)
|
||||
mask = torch.eq(gt_label,-2)
|
||||
|
||||
chose_index = torch.nonzero(mask.data)
|
||||
chose_index = torch.squeeze(chose_index)
|
||||
|
||||
valid_gt_landmark = gt_landmark[chose_index, :]
|
||||
valid_pred_landmark = pred_landmark[chose_index, :]
|
||||
return self.loss_landmark(valid_pred_landmark,valid_gt_landmark)*self.land_factor
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
class PNet(nn.Module):
|
||||
''' PNet '''
|
||||
|
||||
def __init__(self, is_train=False, use_cuda=True):
|
||||
super(PNet, self).__init__()
|
||||
self.is_train = is_train
|
||||
self.use_cuda = use_cuda
|
||||
|
||||
# backend
|
||||
self.pre_layer = nn.Sequential(
|
||||
nn.Conv2d(3, 10, kernel_size=3, stride=1), # conv1
|
||||
nn.PReLU(), # PReLU1
|
||||
nn.MaxPool2d(kernel_size=2, stride=2), # pool1
|
||||
nn.Conv2d(10, 16, kernel_size=3, stride=1), # conv2
|
||||
nn.PReLU(), # PReLU2
|
||||
nn.Conv2d(16, 32, kernel_size=3, stride=1), # conv3
|
||||
nn.PReLU() # PReLU3
|
||||
)
|
||||
# detection
|
||||
self.conv4_1 = nn.Conv2d(32, 1, kernel_size=1, stride=1)
|
||||
# bounding box regresion
|
||||
self.conv4_2 = nn.Conv2d(32, 4, kernel_size=1, stride=1)
|
||||
# landmark localization
|
||||
self.conv4_3 = nn.Conv2d(32, 10, kernel_size=1, stride=1)
|
||||
|
||||
# weight initiation with xavier
|
||||
self.apply(weights_init)
|
||||
|
||||
def forward(self, x):
|
||||
x = self.pre_layer(x)
|
||||
label = F.sigmoid(self.conv4_1(x))
|
||||
offset = self.conv4_2(x)
|
||||
# landmark = self.conv4_3(x)
|
||||
|
||||
if self.is_train is True:
|
||||
# label_loss = LossUtil.label_loss(self.gt_label,torch.squeeze(label))
|
||||
# bbox_loss = LossUtil.bbox_loss(self.gt_bbox,torch.squeeze(offset))
|
||||
return label,offset
|
||||
#landmark = self.conv4_3(x)
|
||||
return label, offset
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
class RNet(nn.Module):
|
||||
''' RNet '''
|
||||
|
||||
def __init__(self,is_train=False, use_cuda=True):
|
||||
super(RNet, self).__init__()
|
||||
self.is_train = is_train
|
||||
self.use_cuda = use_cuda
|
||||
# backend
|
||||
self.pre_layer = nn.Sequential(
|
||||
nn.Conv2d(3, 28, kernel_size=3, stride=1), # conv1
|
||||
nn.PReLU(), # prelu1
|
||||
nn.MaxPool2d(kernel_size=3, stride=2), # pool1
|
||||
nn.Conv2d(28, 48, kernel_size=3, stride=1), # conv2
|
||||
nn.PReLU(), # prelu2
|
||||
nn.MaxPool2d(kernel_size=3, stride=2), # pool2
|
||||
nn.Conv2d(48, 64, kernel_size=2, stride=1), # conv3
|
||||
nn.PReLU() # prelu3
|
||||
|
||||
)
|
||||
self.conv4 = nn.Linear(64*2*2, 128) # conv4
|
||||
self.prelu4 = nn.PReLU() # prelu4
|
||||
# detection
|
||||
self.conv5_1 = nn.Linear(128, 1)
|
||||
# bounding box regression
|
||||
self.conv5_2 = nn.Linear(128, 4)
|
||||
# lanbmark localization
|
||||
self.conv5_3 = nn.Linear(128, 10)
|
||||
# weight initiation weih xavier
|
||||
self.apply(weights_init)
|
||||
|
||||
def forward(self, x):
|
||||
# backend
|
||||
x = self.pre_layer(x)
|
||||
x = x.view(x.size(0), -1)
|
||||
x = self.conv4(x)
|
||||
x = self.prelu4(x)
|
||||
# detection
|
||||
det = torch.sigmoid(self.conv5_1(x))
|
||||
box = self.conv5_2(x)
|
||||
# landmark = self.conv5_3(x)
|
||||
|
||||
if self.is_train is True:
|
||||
return det, box
|
||||
#landmard = self.conv5_3(x)
|
||||
return det, box
|
||||
|
||||
|
||||
|
||||
|
||||
class ONet(nn.Module):
|
||||
''' RNet '''
|
||||
|
||||
def __init__(self,is_train=False, use_cuda=True):
|
||||
super(ONet, self).__init__()
|
||||
self.is_train = is_train
|
||||
self.use_cuda = use_cuda
|
||||
# backend
|
||||
self.pre_layer = nn.Sequential(
|
||||
nn.Conv2d(3, 32, kernel_size=3, stride=1), # conv1
|
||||
nn.PReLU(), # prelu1
|
||||
nn.MaxPool2d(kernel_size=3, stride=2), # pool1
|
||||
nn.Conv2d(32, 64, kernel_size=3, stride=1), # conv2
|
||||
nn.PReLU(), # prelu2
|
||||
nn.MaxPool2d(kernel_size=3, stride=2), # pool2
|
||||
nn.Conv2d(64, 64, kernel_size=3, stride=1), # conv3
|
||||
nn.PReLU(), # prelu3
|
||||
nn.MaxPool2d(kernel_size=2,stride=2), # pool3
|
||||
nn.Conv2d(64,128,kernel_size=2,stride=1), # conv4
|
||||
nn.PReLU() # prelu4
|
||||
)
|
||||
self.conv5 = nn.Linear(128*2*2, 256) # conv5
|
||||
self.prelu5 = nn.PReLU() # prelu5
|
||||
# detection
|
||||
self.conv6_1 = nn.Linear(256, 1)
|
||||
# bounding box regression
|
||||
self.conv6_2 = nn.Linear(256, 4)
|
||||
# lanbmark localization
|
||||
self.conv6_3 = nn.Linear(256, 10)
|
||||
# weight initiation weih xavier
|
||||
self.apply(weights_init)
|
||||
|
||||
def forward(self, x):
|
||||
# backend
|
||||
x = self.pre_layer(x)
|
||||
x = x.view(x.size(0), -1)
|
||||
x = self.conv5(x)
|
||||
x = self.prelu5(x)
|
||||
# detection
|
||||
det = torch.sigmoid(self.conv6_1(x))
|
||||
box = self.conv6_2(x)
|
||||
landmark = self.conv6_3(x)
|
||||
if self.is_train is True:
|
||||
return det, box, landmark
|
||||
#landmard = self.conv5_3(x)
|
||||
return det, box, landmark
|
||||
@ -0,0 +1,42 @@
|
||||
import numpy as np
|
||||
|
||||
|
||||
def torch_nms(dets, thresh, mode="Union"):
|
||||
"""
|
||||
greedily select boxes with high confidence
|
||||
keep boxes overlap <= thresh
|
||||
rule out overlap > thresh
|
||||
:param dets: [[x1, y1, x2, y2 score]]
|
||||
:param thresh: retain overlap <= thresh
|
||||
:return: indexes to keep
|
||||
"""
|
||||
x1 = dets[:, 0]
|
||||
y1 = dets[:, 1]
|
||||
x2 = dets[:, 2]
|
||||
y2 = dets[:, 3]
|
||||
scores = dets[:, 4]
|
||||
|
||||
areas = (x2 - x1 + 1) * (y2 - y1 + 1)
|
||||
order = scores.argsort()[::-1]
|
||||
|
||||
keep = []
|
||||
while order.size > 0:
|
||||
i = order[0]
|
||||
keep.append(i)
|
||||
xx1 = np.maximum(x1[i], x1[order[1:]])
|
||||
yy1 = np.maximum(y1[i], y1[order[1:]])
|
||||
xx2 = np.minimum(x2[i], x2[order[1:]])
|
||||
yy2 = np.minimum(y2[i], y2[order[1:]])
|
||||
|
||||
w = np.maximum(0.0, xx2 - xx1 + 1)
|
||||
h = np.maximum(0.0, yy2 - yy1 + 1)
|
||||
inter = w * h
|
||||
if mode == "Union":
|
||||
ovr = inter / (areas[i] + areas[order[1:]] - inter)
|
||||
elif mode == "Minimum":
|
||||
ovr = inter / np.minimum(areas[i], areas[order[1:]])
|
||||
|
||||
inds = np.where(ovr <= thresh)[0]
|
||||
order = order[inds + 1]
|
||||
|
||||
return keep
|
||||
@ -0,0 +1,2 @@
|
||||
import numpy as np
|
||||
|
||||
@ -0,0 +1,101 @@
|
||||
import numpy as np
|
||||
|
||||
def IoU(box, boxes):
|
||||
"""Compute IoU between detect box and gt boxes
|
||||
|
||||
Parameters:
|
||||
----------
|
||||
box: numpy array , shape (5, ): x1, y1, x2, y2, score
|
||||
input box
|
||||
boxes: numpy array, shape (n, 4): x1, y1, x2, y2
|
||||
input ground truth boxes
|
||||
|
||||
Returns:
|
||||
-------
|
||||
ovr: numpy.array, shape (n, )
|
||||
IoU
|
||||
"""
|
||||
box_area = (box[2] - box[0] + 1) * (box[3] - box[1] + 1)
|
||||
area = (boxes[:, 2] - boxes[:, 0] + 1) * (boxes[:, 3] - boxes[:, 1] + 1)
|
||||
xx1 = np.maximum(box[0], boxes[:, 0])
|
||||
yy1 = np.maximum(box[1], boxes[:, 1])
|
||||
xx2 = np.minimum(box[2], boxes[:, 2])
|
||||
yy2 = np.minimum(box[3], boxes[:, 3])
|
||||
|
||||
# compute the width and height of the bounding box
|
||||
w = np.maximum(0, xx2 - xx1 + 1)
|
||||
h = np.maximum(0, yy2 - yy1 + 1)
|
||||
|
||||
inter = w * h
|
||||
ovr = np.true_divide(inter,(box_area + area - inter))
|
||||
#ovr = inter / (box_area + area - inter)
|
||||
return ovr
|
||||
|
||||
|
||||
def convert_to_square(bbox):
|
||||
"""Convert bbox to square
|
||||
|
||||
Parameters:
|
||||
----------
|
||||
bbox: numpy array , shape n x 5
|
||||
input bbox
|
||||
|
||||
Returns:
|
||||
-------
|
||||
square bbox
|
||||
"""
|
||||
square_bbox = bbox.copy()
|
||||
|
||||
h = bbox[:, 3] - bbox[:, 1] + 1
|
||||
w = bbox[:, 2] - bbox[:, 0] + 1
|
||||
max_side = np.maximum(h,w)
|
||||
square_bbox[:, 0] = bbox[:, 0] + w*0.5 - max_side*0.5
|
||||
square_bbox[:, 1] = bbox[:, 1] + h*0.5 - max_side*0.5
|
||||
square_bbox[:, 2] = square_bbox[:, 0] + max_side - 1
|
||||
square_bbox[:, 3] = square_bbox[:, 1] + max_side - 1
|
||||
return square_bbox
|
||||
|
||||
|
||||
def nms(dets, thresh, mode="Union"):
|
||||
"""
|
||||
greedily select boxes with high confidence
|
||||
keep boxes overlap <= thresh
|
||||
rule out overlap > thresh
|
||||
:param dets: [[x1, y1, x2, y2 score]]
|
||||
:param thresh: retain overlap <= thresh
|
||||
:return: indexes to keep
|
||||
"""
|
||||
x1 = dets[:, 0]
|
||||
y1 = dets[:, 1]
|
||||
x2 = dets[:, 2]
|
||||
y2 = dets[:, 3]
|
||||
scores = dets[:, 4]
|
||||
|
||||
areas = (x2 - x1 + 1) * (y2 - y1 + 1)
|
||||
order = scores.argsort()[::-1]
|
||||
|
||||
keep = []
|
||||
while order.size > 0:
|
||||
i = order[0]
|
||||
keep.append(i)
|
||||
xx1 = np.maximum(x1[i], x1[order[1:]])
|
||||
yy1 = np.maximum(y1[i], y1[order[1:]])
|
||||
xx2 = np.minimum(x2[i], x2[order[1:]])
|
||||
yy2 = np.minimum(y2[i], y2[order[1:]])
|
||||
|
||||
w = np.maximum(0.0, xx2 - xx1 + 1)
|
||||
h = np.maximum(0.0, yy2 - yy1 + 1)
|
||||
inter = w * h
|
||||
if mode == "Union":
|
||||
ovr = inter / (areas[i] + areas[order[1:]] - inter)
|
||||
elif mode == "Minimum":
|
||||
ovr = inter / np.minimum(areas[i], areas[order[1:]])
|
||||
|
||||
inds = np.where(ovr <= thresh)[0]
|
||||
order = order[inds + 1]
|
||||
|
||||
return keep
|
||||
|
||||
|
||||
|
||||
|
||||
@ -0,0 +1,141 @@
|
||||
from matplotlib.patches import Circle
|
||||
|
||||
|
||||
def vis_two(im_array, dets1, dets2, thresh=0.9):
|
||||
"""Visualize detection results before and after calibration
|
||||
|
||||
Parameters:
|
||||
----------
|
||||
im_array: numpy.ndarray, shape(1, c, h, w)
|
||||
test image in rgb
|
||||
dets1: numpy.ndarray([[x1 y1 x2 y2 score]])
|
||||
detection results before calibration
|
||||
dets2: numpy.ndarray([[x1 y1 x2 y2 score]])
|
||||
detection results after calibration
|
||||
thresh: float
|
||||
boxes with scores > thresh will be drawn in red otherwise yellow
|
||||
|
||||
Returns:
|
||||
-------
|
||||
"""
|
||||
import matplotlib.pyplot as plt
|
||||
import random
|
||||
|
||||
figure = plt.figure()
|
||||
plt.subplot(121)
|
||||
plt.imshow(im_array)
|
||||
color = 'yellow'
|
||||
|
||||
for i in range(dets1.shape[0]):
|
||||
bbox = dets1[i, :4]
|
||||
landmarks = dets1[i, 5:]
|
||||
score = dets1[i, 4]
|
||||
if score > thresh:
|
||||
rect = plt.Rectangle((bbox[0], bbox[1]),
|
||||
bbox[2] - bbox[0],
|
||||
bbox[3] - bbox[1], fill=False,
|
||||
edgecolor='red', linewidth=0.7)
|
||||
plt.gca().add_patch(rect)
|
||||
landmarks = landmarks.reshape((5,2))
|
||||
for j in range(5):
|
||||
plt.scatter(landmarks[j,0],landmarks[j,1],c='yellow',linewidths=0.1, marker='x', s=5)
|
||||
|
||||
|
||||
# plt.gca().text(bbox[0], bbox[1] - 2,
|
||||
# '{:.3f}'.format(score),
|
||||
# bbox=dict(facecolor='blue', alpha=0.5), fontsize=12, color='white')
|
||||
# else:
|
||||
# rect = plt.Rectangle((bbox[0], bbox[1]),
|
||||
# bbox[2] - bbox[0],
|
||||
# bbox[3] - bbox[1], fill=False,
|
||||
# edgecolor=color, linewidth=0.5)
|
||||
# plt.gca().add_patch(rect)
|
||||
|
||||
plt.subplot(122)
|
||||
plt.imshow(im_array)
|
||||
color = 'yellow'
|
||||
|
||||
for i in range(dets2.shape[0]):
|
||||
bbox = dets2[i, :4]
|
||||
landmarks = dets1[i, 5:]
|
||||
score = dets2[i, 4]
|
||||
if score > thresh:
|
||||
rect = plt.Rectangle((bbox[0], bbox[1]),
|
||||
bbox[2] - bbox[0],
|
||||
bbox[3] - bbox[1], fill=False,
|
||||
edgecolor='red', linewidth=0.7)
|
||||
plt.gca().add_patch(rect)
|
||||
|
||||
landmarks = landmarks.reshape((5, 2))
|
||||
for j in range(5):
|
||||
plt.scatter(landmarks[j, 0], landmarks[j, 1], c='yellow',linewidths=0.1, marker='x', s=5)
|
||||
|
||||
# plt.gca().text(bbox[0], bbox[1] - 2,
|
||||
# '{:.3f}'.format(score),
|
||||
# bbox=dict(facecolor='blue', alpha=0.5), fontsize=12, color='white')
|
||||
# else:
|
||||
# rect = plt.Rectangle((bbox[0], bbox[1]),
|
||||
# bbox[2] - bbox[0],
|
||||
# bbox[3] - bbox[1], fill=False,
|
||||
# edgecolor=color, linewidth=0.5)
|
||||
# plt.gca().add_patch(rect)
|
||||
plt.show()
|
||||
|
||||
|
||||
def vis_face(im_array, dets, landmarks=None):
|
||||
"""Visualize detection results before and after calibration
|
||||
|
||||
Parameters:
|
||||
----------
|
||||
im_array: numpy.ndarray, shape(1, c, h, w)
|
||||
test image in rgb
|
||||
dets1: numpy.ndarray([[x1 y1 x2 y2 score]])
|
||||
detection results before calibration
|
||||
dets2: numpy.ndarray([[x1 y1 x2 y2 score]])
|
||||
detection results after calibration
|
||||
thresh: float
|
||||
boxes with scores > thresh will be drawn in red otherwise yellow
|
||||
|
||||
Returns:
|
||||
-------
|
||||
"""
|
||||
import matplotlib.pyplot as plt
|
||||
import random
|
||||
import pylab
|
||||
|
||||
figure = pylab.figure()
|
||||
# plt.subplot(121)
|
||||
pylab.imshow(im_array)
|
||||
figure.suptitle('DFace Detector', fontsize=20)
|
||||
|
||||
|
||||
|
||||
for i in range(dets.shape[0]):
|
||||
bbox = dets[i, :4]
|
||||
|
||||
rect = pylab.Rectangle((bbox[0], bbox[1]),
|
||||
bbox[2] - bbox[0],
|
||||
bbox[3] - bbox[1], fill=False,
|
||||
edgecolor='yellow', linewidth=0.9)
|
||||
pylab.gca().add_patch(rect)
|
||||
|
||||
if landmarks is not None:
|
||||
for i in range(landmarks.shape[0]):
|
||||
landmarks_one = landmarks[i, :]
|
||||
landmarks_one = landmarks_one.reshape((5, 2))
|
||||
for j in range(5):
|
||||
# pylab.scatter(landmarks_one[j, 0], landmarks_one[j, 1], c='yellow', linewidths=0.1, marker='x', s=5)
|
||||
|
||||
cir1 = Circle(xy=(landmarks_one[j, 0], landmarks_one[j, 1]), radius=2, alpha=0.4, color="red")
|
||||
pylab.gca().add_patch(cir1)
|
||||
# plt.gca().text(bbox[0], bbox[1] - 2,
|
||||
# '{:.3f}'.format(score),
|
||||
# bbox=dict(facecolor='blue', alpha=0.5), fontsize=12, color='white')
|
||||
# else:
|
||||
# rect = plt.Rectangle((bbox[0], bbox[1]),
|
||||
# bbox[2] - bbox[0],
|
||||
# bbox[3] - bbox[1], fill=False,
|
||||
# edgecolor=color, linewidth=0.5)
|
||||
# plt.gca().add_patch(rect)
|
||||
|
||||
pylab.show()
|
||||
@ -0,0 +1,35 @@
|
||||
|
||||
import os
|
||||
import numpy.random as npr
|
||||
import numpy as np
|
||||
|
||||
def assemble_data(output_file, anno_file_list=[]):
|
||||
#assemble the annotations to one file
|
||||
size = 12
|
||||
|
||||
if len(anno_file_list)==0:
|
||||
return 0
|
||||
|
||||
if os.path.exists(output_file):
|
||||
os.remove(output_file)
|
||||
|
||||
for anno_file in anno_file_list:
|
||||
with open(anno_file, 'r') as f:
|
||||
anno_lines = f.readlines()
|
||||
|
||||
base_num = 250000
|
||||
|
||||
if len(anno_lines) > base_num * 3:
|
||||
idx_keep = npr.choice(len(anno_lines), size=base_num * 3, replace=True)
|
||||
elif len(anno_lines) > 100000:
|
||||
idx_keep = npr.choice(len(anno_lines), size=len(anno_lines), replace=True)
|
||||
else:
|
||||
idx_keep = np.arange(len(anno_lines))
|
||||
np.random.shuffle(idx_keep)
|
||||
chose_count = 0
|
||||
with open(output_file, 'a+') as f:
|
||||
for idx in idx_keep:
|
||||
f.write(anno_lines[idx])
|
||||
chose_count+=1
|
||||
|
||||
return chose_count
|
||||
@ -0,0 +1,25 @@
|
||||
import os
|
||||
import config
|
||||
import assemble as assemble
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
|
||||
anno_list = []
|
||||
|
||||
net_landmark_file = os.path.join(config.ANNO_STORE_DIR,config.ONET_LANDMARK_ANNO_FILENAME)
|
||||
net_postive_file = os.path.join(config.ANNO_STORE_DIR,config.ONET_POSTIVE_ANNO_FILENAME)
|
||||
net_part_file = os.path.join(config.ANNO_STORE_DIR,config.ONET_PART_ANNO_FILENAME)
|
||||
net_neg_file = os.path.join(config.ANNO_STORE_DIR,config.ONET_NEGATIVE_ANNO_FILENAME)
|
||||
|
||||
anno_list.append(net_postive_file)
|
||||
anno_list.append(net_part_file)
|
||||
anno_list.append(net_neg_file)
|
||||
anno_list.append(net_landmark_file)
|
||||
|
||||
imglist_filename = config.ONET_TRAIN_IMGLIST_FILENAME
|
||||
anno_dir = config.ANNO_STORE_DIR
|
||||
imglist_file = os.path.join(anno_dir, imglist_filename)
|
||||
|
||||
chose_count = assemble.assemble_data(imglist_file ,anno_list)
|
||||
print "PNet train annotation result file path:%s" % imglist_file
|
||||
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in new issue