commit
fe46b77edf
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
@ -0,0 +1,186 @@
|
||||
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
import paddle
|
||||
import paddle.nn as nn
|
||||
|
||||
from arch.base_module import SNConv, SNConvTranspose, ResBlock
|
||||
|
||||
|
||||
class Encoder(nn.Layer):
|
||||
def __init__(self, name, in_channels, encode_dim, use_bias, norm_layer,
|
||||
act, act_attr, conv_block_dropout, conv_block_num,
|
||||
conv_block_dilation):
|
||||
super(Encoder, self).__init__()
|
||||
self._pad2d = paddle.nn.Pad2D([3, 3, 3, 3], mode="replicate")
|
||||
self._in_conv = SNConv(
|
||||
name=name + "_in_conv",
|
||||
in_channels=in_channels,
|
||||
out_channels=encode_dim,
|
||||
kernel_size=7,
|
||||
use_bias=use_bias,
|
||||
norm_layer=norm_layer,
|
||||
act=act,
|
||||
act_attr=act_attr)
|
||||
self._down1 = SNConv(
|
||||
name=name + "_down1",
|
||||
in_channels=encode_dim,
|
||||
out_channels=encode_dim * 2,
|
||||
kernel_size=3,
|
||||
stride=2,
|
||||
padding=1,
|
||||
use_bias=use_bias,
|
||||
norm_layer=norm_layer,
|
||||
act=act,
|
||||
act_attr=act_attr)
|
||||
self._down2 = SNConv(
|
||||
name=name + "_down2",
|
||||
in_channels=encode_dim * 2,
|
||||
out_channels=encode_dim * 4,
|
||||
kernel_size=3,
|
||||
stride=2,
|
||||
padding=1,
|
||||
use_bias=use_bias,
|
||||
norm_layer=norm_layer,
|
||||
act=act,
|
||||
act_attr=act_attr)
|
||||
self._down3 = SNConv(
|
||||
name=name + "_down3",
|
||||
in_channels=encode_dim * 4,
|
||||
out_channels=encode_dim * 4,
|
||||
kernel_size=3,
|
||||
stride=2,
|
||||
padding=1,
|
||||
use_bias=use_bias,
|
||||
norm_layer=norm_layer,
|
||||
act=act,
|
||||
act_attr=act_attr)
|
||||
conv_blocks = []
|
||||
for i in range(conv_block_num):
|
||||
conv_blocks.append(
|
||||
ResBlock(
|
||||
name="{}_conv_block_{}".format(name, i),
|
||||
channels=encode_dim * 4,
|
||||
norm_layer=norm_layer,
|
||||
use_dropout=conv_block_dropout,
|
||||
use_dilation=conv_block_dilation,
|
||||
use_bias=use_bias))
|
||||
self._conv_blocks = nn.Sequential(*conv_blocks)
|
||||
|
||||
def forward(self, x):
|
||||
out_dict = dict()
|
||||
x = self._pad2d(x)
|
||||
out_dict["in_conv"] = self._in_conv.forward(x)
|
||||
out_dict["down1"] = self._down1.forward(out_dict["in_conv"])
|
||||
out_dict["down2"] = self._down2.forward(out_dict["down1"])
|
||||
out_dict["down3"] = self._down3.forward(out_dict["down2"])
|
||||
out_dict["res_blocks"] = self._conv_blocks.forward(out_dict["down3"])
|
||||
return out_dict
|
||||
|
||||
|
||||
class EncoderUnet(nn.Layer):
|
||||
def __init__(self, name, in_channels, encode_dim, use_bias, norm_layer,
|
||||
act, act_attr):
|
||||
super(EncoderUnet, self).__init__()
|
||||
self._pad2d = paddle.nn.Pad2D([3, 3, 3, 3], mode="replicate")
|
||||
self._in_conv = SNConv(
|
||||
name=name + "_in_conv",
|
||||
in_channels=in_channels,
|
||||
out_channels=encode_dim,
|
||||
kernel_size=7,
|
||||
use_bias=use_bias,
|
||||
norm_layer=norm_layer,
|
||||
act=act,
|
||||
act_attr=act_attr)
|
||||
self._down1 = SNConv(
|
||||
name=name + "_down1",
|
||||
in_channels=encode_dim,
|
||||
out_channels=encode_dim * 2,
|
||||
kernel_size=3,
|
||||
stride=2,
|
||||
padding=1,
|
||||
use_bias=use_bias,
|
||||
norm_layer=norm_layer,
|
||||
act=act,
|
||||
act_attr=act_attr)
|
||||
self._down2 = SNConv(
|
||||
name=name + "_down2",
|
||||
in_channels=encode_dim * 2,
|
||||
out_channels=encode_dim * 2,
|
||||
kernel_size=3,
|
||||
stride=2,
|
||||
padding=1,
|
||||
use_bias=use_bias,
|
||||
norm_layer=norm_layer,
|
||||
act=act,
|
||||
act_attr=act_attr)
|
||||
self._down3 = SNConv(
|
||||
name=name + "_down3",
|
||||
in_channels=encode_dim * 2,
|
||||
out_channels=encode_dim * 2,
|
||||
kernel_size=3,
|
||||
stride=2,
|
||||
padding=1,
|
||||
use_bias=use_bias,
|
||||
norm_layer=norm_layer,
|
||||
act=act,
|
||||
act_attr=act_attr)
|
||||
self._down4 = SNConv(
|
||||
name=name + "_down4",
|
||||
in_channels=encode_dim * 2,
|
||||
out_channels=encode_dim * 2,
|
||||
kernel_size=3,
|
||||
stride=2,
|
||||
padding=1,
|
||||
use_bias=use_bias,
|
||||
norm_layer=norm_layer,
|
||||
act=act,
|
||||
act_attr=act_attr)
|
||||
self._up1 = SNConvTranspose(
|
||||
name=name + "_up1",
|
||||
in_channels=encode_dim * 2,
|
||||
out_channels=encode_dim * 2,
|
||||
kernel_size=3,
|
||||
stride=2,
|
||||
padding=1,
|
||||
use_bias=use_bias,
|
||||
norm_layer=norm_layer,
|
||||
act=act,
|
||||
act_attr=act_attr)
|
||||
self._up2 = SNConvTranspose(
|
||||
name=name + "_up2",
|
||||
in_channels=encode_dim * 4,
|
||||
out_channels=encode_dim * 4,
|
||||
kernel_size=3,
|
||||
stride=2,
|
||||
padding=1,
|
||||
use_bias=use_bias,
|
||||
norm_layer=norm_layer,
|
||||
act=act,
|
||||
act_attr=act_attr)
|
||||
|
||||
def forward(self, x):
|
||||
output_dict = dict()
|
||||
x = self._pad2d(x)
|
||||
output_dict['in_conv'] = self._in_conv.forward(x)
|
||||
output_dict['down1'] = self._down1.forward(output_dict['in_conv'])
|
||||
output_dict['down2'] = self._down2.forward(output_dict['down1'])
|
||||
output_dict['down3'] = self._down3.forward(output_dict['down2'])
|
||||
output_dict['down4'] = self._down4.forward(output_dict['down3'])
|
||||
output_dict['up1'] = self._up1.forward(output_dict['down4'])
|
||||
output_dict['up2'] = self._up2.forward(
|
||||
paddle.concat(
|
||||
(output_dict['down3'], output_dict['up1']), axis=1))
|
||||
output_dict['concat'] = paddle.concat(
|
||||
(output_dict['down2'], output_dict['up2']), axis=1)
|
||||
return output_dict
|
@ -0,0 +1,150 @@
|
||||
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
import paddle
|
||||
import paddle.nn as nn
|
||||
import paddle.nn.functional as F
|
||||
|
||||
|
||||
def normal_(x, mean=0., std=1.):
|
||||
temp_value = paddle.normal(mean, std, shape=x.shape)
|
||||
x.set_value(temp_value)
|
||||
return x
|
||||
|
||||
|
||||
class SpectralNorm(object):
|
||||
def __init__(self, name='weight', n_power_iterations=1, dim=0, eps=1e-12):
|
||||
self.name = name
|
||||
self.dim = dim
|
||||
if n_power_iterations <= 0:
|
||||
raise ValueError('Expected n_power_iterations to be positive, but '
|
||||
'got n_power_iterations={}'.format(
|
||||
n_power_iterations))
|
||||
self.n_power_iterations = n_power_iterations
|
||||
self.eps = eps
|
||||
|
||||
def reshape_weight_to_matrix(self, weight):
|
||||
weight_mat = weight
|
||||
if self.dim != 0:
|
||||
# transpose dim to front
|
||||
weight_mat = weight_mat.transpose([
|
||||
self.dim,
|
||||
* [d for d in range(weight_mat.dim()) if d != self.dim]
|
||||
])
|
||||
|
||||
height = weight_mat.shape[0]
|
||||
|
||||
return weight_mat.reshape([height, -1])
|
||||
|
||||
def compute_weight(self, module, do_power_iteration):
|
||||
weight = getattr(module, self.name + '_orig')
|
||||
u = getattr(module, self.name + '_u')
|
||||
v = getattr(module, self.name + '_v')
|
||||
weight_mat = self.reshape_weight_to_matrix(weight)
|
||||
|
||||
if do_power_iteration:
|
||||
with paddle.no_grad():
|
||||
for _ in range(self.n_power_iterations):
|
||||
v.set_value(
|
||||
F.normalize(
|
||||
paddle.matmul(
|
||||
weight_mat,
|
||||
u,
|
||||
transpose_x=True,
|
||||
transpose_y=False),
|
||||
axis=0,
|
||||
epsilon=self.eps, ))
|
||||
|
||||
u.set_value(
|
||||
F.normalize(
|
||||
paddle.matmul(weight_mat, v),
|
||||
axis=0,
|
||||
epsilon=self.eps, ))
|
||||
if self.n_power_iterations > 0:
|
||||
u = u.clone()
|
||||
v = v.clone()
|
||||
|
||||
sigma = paddle.dot(u, paddle.mv(weight_mat, v))
|
||||
weight = weight / sigma
|
||||
return weight
|
||||
|
||||
def remove(self, module):
|
||||
with paddle.no_grad():
|
||||
weight = self.compute_weight(module, do_power_iteration=False)
|
||||
delattr(module, self.name)
|
||||
delattr(module, self.name + '_u')
|
||||
delattr(module, self.name + '_v')
|
||||
delattr(module, self.name + '_orig')
|
||||
|
||||
module.add_parameter(self.name, weight.detach())
|
||||
|
||||
def __call__(self, module, inputs):
|
||||
setattr(
|
||||
module,
|
||||
self.name,
|
||||
self.compute_weight(
|
||||
module, do_power_iteration=module.training))
|
||||
|
||||
@staticmethod
|
||||
def apply(module, name, n_power_iterations, dim, eps):
|
||||
for k, hook in module._forward_pre_hooks.items():
|
||||
if isinstance(hook, SpectralNorm) and hook.name == name:
|
||||
raise RuntimeError(
|
||||
"Cannot register two spectral_norm hooks on "
|
||||
"the same parameter {}".format(name))
|
||||
|
||||
fn = SpectralNorm(name, n_power_iterations, dim, eps)
|
||||
weight = module._parameters[name]
|
||||
|
||||
with paddle.no_grad():
|
||||
weight_mat = fn.reshape_weight_to_matrix(weight)
|
||||
h, w = weight_mat.shape
|
||||
|
||||
# randomly initialize u and v
|
||||
u = module.create_parameter([h])
|
||||
u = normal_(u, 0., 1.)
|
||||
v = module.create_parameter([w])
|
||||
v = normal_(v, 0., 1.)
|
||||
u = F.normalize(u, axis=0, epsilon=fn.eps)
|
||||
v = F.normalize(v, axis=0, epsilon=fn.eps)
|
||||
|
||||
# delete fn.name form parameters, otherwise you can not set attribute
|
||||
del module._parameters[fn.name]
|
||||
module.add_parameter(fn.name + "_orig", weight)
|
||||
# still need to assign weight back as fn.name because all sorts of
|
||||
# things may assume that it exists, e.g., when initializing weights.
|
||||
# However, we can't directly assign as it could be an Parameter and
|
||||
# gets added as a parameter. Instead, we register weight * 1.0 as a plain
|
||||
# attribute.
|
||||
setattr(module, fn.name, weight * 1.0)
|
||||
module.register_buffer(fn.name + "_u", u)
|
||||
module.register_buffer(fn.name + "_v", v)
|
||||
|
||||
module.register_forward_pre_hook(fn)
|
||||
return fn
|
||||
|
||||
|
||||
def spectral_norm(module,
|
||||
name='weight',
|
||||
n_power_iterations=1,
|
||||
eps=1e-12,
|
||||
dim=None):
|
||||
|
||||
if dim is None:
|
||||
if isinstance(module, (nn.Conv1DTranspose, nn.Conv2DTranspose,
|
||||
nn.Conv3DTranspose, nn.Linear)):
|
||||
dim = 1
|
||||
else:
|
||||
dim = 0
|
||||
SpectralNorm.apply(module, name, n_power_iterations, dim, eps)
|
||||
return module
|
File diff suppressed because it is too large
Load Diff
@ -0,0 +1,54 @@
|
||||
Global:
|
||||
output_num: 10
|
||||
output_dir: output_data
|
||||
use_gpu: false
|
||||
image_height: 32
|
||||
image_width: 320
|
||||
TextDrawer:
|
||||
fonts:
|
||||
en: fonts/en_standard.ttf
|
||||
ch: fonts/ch_standard.ttf
|
||||
ko: fonts/ko_standard.ttf
|
||||
Predictor:
|
||||
method: StyleTextRecPredictor
|
||||
algorithm: StyleTextRec
|
||||
scale: 0.00392156862745098
|
||||
mean:
|
||||
- 0.5
|
||||
- 0.5
|
||||
- 0.5
|
||||
std:
|
||||
- 0.5
|
||||
- 0.5
|
||||
- 0.5
|
||||
expand_result: false
|
||||
bg_generator:
|
||||
pretrain: style_text_models/bg_generator
|
||||
module_name: bg_generator
|
||||
generator_type: BgGeneratorWithMask
|
||||
encode_dim: 64
|
||||
norm_layer: null
|
||||
conv_block_num: 4
|
||||
conv_block_dropout: false
|
||||
conv_block_dilation: true
|
||||
output_factor: 1.05
|
||||
text_generator:
|
||||
pretrain: style_text_models/text_generator
|
||||
module_name: text_generator
|
||||
generator_type: TextGenerator
|
||||
encode_dim: 64
|
||||
norm_layer: InstanceNorm2D
|
||||
conv_block_num: 4
|
||||
conv_block_dropout: false
|
||||
conv_block_dilation: true
|
||||
fusion_generator:
|
||||
pretrain: style_text_models/fusion_generator
|
||||
module_name: fusion_generator
|
||||
generator_type: FusionGeneratorSimple
|
||||
encode_dim: 64
|
||||
norm_layer: null
|
||||
conv_block_num: 4
|
||||
conv_block_dropout: false
|
||||
conv_block_dilation: true
|
||||
Writer:
|
||||
method: SimpleWriter
|
@ -0,0 +1,64 @@
|
||||
Global:
|
||||
output_num: 10
|
||||
output_dir: output_data
|
||||
use_gpu: false
|
||||
image_height: 32
|
||||
image_width: 320
|
||||
standard_font: fonts/en_standard.ttf
|
||||
TextDrawer:
|
||||
fonts:
|
||||
en: fonts/en_standard.ttf
|
||||
ch: fonts/ch_standard.ttf
|
||||
ko: fonts/ko_standard.ttf
|
||||
StyleSampler:
|
||||
method: DatasetSampler
|
||||
image_home: examples
|
||||
label_file: examples/image_list.txt
|
||||
with_label: true
|
||||
CorpusGenerator:
|
||||
method: FileCorpus
|
||||
language: ch
|
||||
corpus_file: examples/corpus/example.txt
|
||||
Predictor:
|
||||
method: StyleTextRecPredictor
|
||||
algorithm: StyleTextRec
|
||||
scale: 0.00392156862745098
|
||||
mean:
|
||||
- 0.5
|
||||
- 0.5
|
||||
- 0.5
|
||||
std:
|
||||
- 0.5
|
||||
- 0.5
|
||||
- 0.5
|
||||
expand_result: false
|
||||
bg_generator:
|
||||
pretrain: models/style_text_rec/bg_generator
|
||||
module_name: bg_generator
|
||||
generator_type: BgGeneratorWithMask
|
||||
encode_dim: 64
|
||||
norm_layer: null
|
||||
conv_block_num: 4
|
||||
conv_block_dropout: false
|
||||
conv_block_dilation: true
|
||||
output_factor: 1.05
|
||||
text_generator:
|
||||
pretrain: models/style_text_rec/text_generator
|
||||
module_name: text_generator
|
||||
generator_type: TextGenerator
|
||||
encode_dim: 64
|
||||
norm_layer: InstanceNorm2D
|
||||
conv_block_num: 4
|
||||
conv_block_dropout: false
|
||||
conv_block_dilation: true
|
||||
fusion_generator:
|
||||
pretrain: models/style_text_rec/fusion_generator
|
||||
module_name: fusion_generator
|
||||
generator_type: FusionGeneratorSimple
|
||||
encode_dim: 64
|
||||
norm_layer: null
|
||||
conv_block_num: 4
|
||||
conv_block_dropout: false
|
||||
conv_block_dilation: true
|
||||
Writer:
|
||||
method: SimpleWriter
|
After Width: | Height: | Size: 168 KiB |
After Width: | Height: | Size: 201 KiB |
After Width: | Height: | Size: 68 KiB |
After Width: | Height: | Size: 2.2 KiB |
After Width: | Height: | Size: 122 KiB |
After Width: | Height: | Size: 125 KiB |
@ -0,0 +1,66 @@
|
||||
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
import random
|
||||
|
||||
from utils.logging import get_logger
|
||||
|
||||
|
||||
class FileCorpus(object):
|
||||
def __init__(self, config):
|
||||
self.logger = get_logger()
|
||||
self.logger.info("using FileCorpus")
|
||||
|
||||
self.char_list = " 0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz"
|
||||
|
||||
corpus_file = config["CorpusGenerator"]["corpus_file"]
|
||||
self.language = config["CorpusGenerator"]["language"]
|
||||
with open(corpus_file, 'r') as f:
|
||||
corpus_raw = f.read()
|
||||
self.corpus_list = corpus_raw.split("\n")[:-1]
|
||||
assert len(self.corpus_list) > 0
|
||||
random.shuffle(self.corpus_list)
|
||||
self.index = 0
|
||||
|
||||
def generate(self, corpus_length=0):
|
||||
if self.index >= len(self.corpus_list):
|
||||
self.index = 0
|
||||
random.shuffle(self.corpus_list)
|
||||
corpus = self.corpus_list[self.index]
|
||||
if corpus_length != 0:
|
||||
corpus = corpus[0:corpus_length]
|
||||
if corpus_length > len(corpus):
|
||||
self.logger.warning("generated corpus is shorter than expected.")
|
||||
self.index += 1
|
||||
return self.language, corpus
|
||||
|
||||
|
||||
class EnNumCorpus(object):
|
||||
def __init__(self, config):
|
||||
self.logger = get_logger()
|
||||
self.logger.info("using NumberCorpus")
|
||||
self.num_list = "0123456789"
|
||||
self.en_char_list = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz"
|
||||
self.height = config["Global"]["image_height"]
|
||||
self.max_width = config["Global"]["image_width"]
|
||||
|
||||
def generate(self, corpus_length=0):
|
||||
corpus = ""
|
||||
if corpus_length == 0:
|
||||
corpus_length = random.randint(5, 15)
|
||||
for i in range(corpus_length):
|
||||
if random.random() < 0.2:
|
||||
corpus += "{}".format(random.choice(self.en_char_list))
|
||||
else:
|
||||
corpus += "{}".format(random.choice(self.num_list))
|
||||
return "en", corpus
|
@ -0,0 +1,115 @@
|
||||
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
import numpy as np
|
||||
import cv2
|
||||
import math
|
||||
import paddle
|
||||
|
||||
from arch import style_text_rec
|
||||
from utils.sys_funcs import check_gpu
|
||||
from utils.logging import get_logger
|
||||
|
||||
|
||||
class StyleTextRecPredictor(object):
|
||||
def __init__(self, config):
|
||||
algorithm = config['Predictor']['algorithm']
|
||||
assert algorithm in ["StyleTextRec"
|
||||
], "Generator {} not supported.".format(algorithm)
|
||||
use_gpu = config["Global"]['use_gpu']
|
||||
check_gpu(use_gpu)
|
||||
self.logger = get_logger()
|
||||
self.generator = getattr(style_text_rec, algorithm)(config)
|
||||
self.height = config["Global"]["image_height"]
|
||||
self.width = config["Global"]["image_width"]
|
||||
self.scale = config["Predictor"]["scale"]
|
||||
self.mean = config["Predictor"]["mean"]
|
||||
self.std = config["Predictor"]["std"]
|
||||
self.expand_result = config["Predictor"]["expand_result"]
|
||||
|
||||
def predict(self, style_input, text_input):
|
||||
style_input = self.rep_style_input(style_input, text_input)
|
||||
tensor_style_input = self.preprocess(style_input)
|
||||
tensor_text_input = self.preprocess(text_input)
|
||||
style_text_result = self.generator.forward(tensor_style_input,
|
||||
tensor_text_input)
|
||||
fake_fusion = self.postprocess(style_text_result["fake_fusion"])
|
||||
fake_text = self.postprocess(style_text_result["fake_text"])
|
||||
fake_sk = self.postprocess(style_text_result["fake_sk"])
|
||||
fake_bg = self.postprocess(style_text_result["fake_bg"])
|
||||
bbox = self.get_text_boundary(fake_text)
|
||||
if bbox:
|
||||
left, right, top, bottom = bbox
|
||||
fake_fusion = fake_fusion[top:bottom, left:right, :]
|
||||
fake_text = fake_text[top:bottom, left:right, :]
|
||||
fake_sk = fake_sk[top:bottom, left:right, :]
|
||||
fake_bg = fake_bg[top:bottom, left:right, :]
|
||||
|
||||
# fake_fusion = self.crop_by_text(img_fake_fusion, img_fake_text)
|
||||
return {
|
||||
"fake_fusion": fake_fusion,
|
||||
"fake_text": fake_text,
|
||||
"fake_sk": fake_sk,
|
||||
"fake_bg": fake_bg,
|
||||
}
|
||||
|
||||
def preprocess(self, img):
|
||||
img = (img.astype('float32') * self.scale - self.mean) / self.std
|
||||
img_height, img_width, channel = img.shape
|
||||
assert channel == 3, "Please use an rgb image."
|
||||
ratio = img_width / float(img_height)
|
||||
if math.ceil(self.height * ratio) > self.width:
|
||||
resized_w = self.width
|
||||
else:
|
||||
resized_w = int(math.ceil(self.height * ratio))
|
||||
img = cv2.resize(img, (resized_w, self.height))
|
||||
|
||||
new_img = np.zeros([self.height, self.width, 3]).astype('float32')
|
||||
new_img[:, 0:resized_w, :] = img
|
||||
img = new_img.transpose((2, 0, 1))
|
||||
img = img[np.newaxis, :, :, :]
|
||||
return paddle.to_tensor(img)
|
||||
|
||||
def postprocess(self, tensor):
|
||||
img = tensor.numpy()[0]
|
||||
img = img.transpose((1, 2, 0))
|
||||
img = (img * self.std + self.mean) / self.scale
|
||||
img = np.maximum(img, 0.0)
|
||||
img = np.minimum(img, 255.0)
|
||||
img = img.astype('uint8')
|
||||
return img
|
||||
|
||||
def rep_style_input(self, style_input, text_input):
|
||||
rep_num = int(1.2 * (text_input.shape[1] / text_input.shape[0]) /
|
||||
(style_input.shape[1] / style_input.shape[0])) + 1
|
||||
style_input = np.tile(style_input, reps=[1, rep_num, 1])
|
||||
max_width = int(self.width / self.height * style_input.shape[0])
|
||||
style_input = style_input[:, :max_width, :]
|
||||
return style_input
|
||||
|
||||
def get_text_boundary(self, text_img):
|
||||
img_height = text_img.shape[0]
|
||||
img_width = text_img.shape[1]
|
||||
bounder = 3
|
||||
text_canny_img = cv2.Canny(text_img, 10, 20)
|
||||
edge_num_h = text_canny_img.sum(axis=0)
|
||||
no_zero_list_h = np.where(edge_num_h > 0)[0]
|
||||
edge_num_w = text_canny_img.sum(axis=1)
|
||||
no_zero_list_w = np.where(edge_num_w > 0)[0]
|
||||
if len(no_zero_list_h) == 0 or len(no_zero_list_w) == 0:
|
||||
return None
|
||||
left = max(no_zero_list_h[0] - bounder, 0)
|
||||
right = min(no_zero_list_h[-1] + bounder, img_width)
|
||||
top = max(no_zero_list_w[0] - bounder, 0)
|
||||
bottom = min(no_zero_list_w[-1] + bounder, img_height)
|
||||
return [left, right, top, bottom]
|
@ -0,0 +1,62 @@
|
||||
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
import numpy as np
|
||||
import random
|
||||
import cv2
|
||||
|
||||
|
||||
class DatasetSampler(object):
|
||||
def __init__(self, config):
|
||||
self.image_home = config["StyleSampler"]["image_home"]
|
||||
label_file = config["StyleSampler"]["label_file"]
|
||||
self.dataset_with_label = config["StyleSampler"]["with_label"]
|
||||
self.height = config["Global"]["image_height"]
|
||||
self.index = 0
|
||||
with open(label_file, "r") as f:
|
||||
label_raw = f.read()
|
||||
self.path_label_list = label_raw.split("\n")[:-1]
|
||||
assert len(self.path_label_list) > 0
|
||||
random.shuffle(self.path_label_list)
|
||||
|
||||
def sample(self):
|
||||
if self.index >= len(self.path_label_list):
|
||||
random.shuffle(self.path_label_list)
|
||||
self.index = 0
|
||||
if self.dataset_with_label:
|
||||
path_label = self.path_label_list[self.index]
|
||||
rel_image_path, label = path_label.split('\t')
|
||||
else:
|
||||
rel_image_path = self.path_label_list[self.index]
|
||||
label = None
|
||||
img_path = "{}/{}".format(self.image_home, rel_image_path)
|
||||
image = cv2.imread(img_path)
|
||||
origin_height = image.shape[0]
|
||||
ratio = self.height / origin_height
|
||||
width = int(image.shape[1] * ratio)
|
||||
height = int(image.shape[0] * ratio)
|
||||
image = cv2.resize(image, (width, height))
|
||||
|
||||
self.index += 1
|
||||
if label:
|
||||
return {"image": image, "label": label}
|
||||
else:
|
||||
return {"image": image}
|
||||
|
||||
|
||||
def duplicate_image(image, width):
|
||||
image_width = image.shape[1]
|
||||
dup_num = width // image_width + 1
|
||||
image = np.tile(image, reps=[1, dup_num, 1])
|
||||
cropped_image = image[:, :width, :]
|
||||
return cropped_image
|
@ -0,0 +1,71 @@
|
||||
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
import os
|
||||
|
||||
from utils.config import ArgsParser, load_config, override_config
|
||||
from utils.logging import get_logger
|
||||
from engine import style_samplers, corpus_generators, text_drawers, predictors, writers
|
||||
|
||||
|
||||
class ImageSynthesiser(object):
|
||||
def __init__(self):
|
||||
self.FLAGS = ArgsParser().parse_args()
|
||||
self.config = load_config(self.FLAGS.config)
|
||||
self.config = override_config(self.config, options=self.FLAGS.override)
|
||||
self.output_dir = self.config["Global"]["output_dir"]
|
||||
if not os.path.exists(self.output_dir):
|
||||
os.mkdir(self.output_dir)
|
||||
self.logger = get_logger(
|
||||
log_file='{}/predict.log'.format(self.output_dir))
|
||||
|
||||
self.text_drawer = text_drawers.StdTextDrawer(self.config)
|
||||
|
||||
predictor_method = self.config["Predictor"]["method"]
|
||||
assert predictor_method is not None
|
||||
self.predictor = getattr(predictors, predictor_method)(self.config)
|
||||
|
||||
def synth_image(self, corpus, style_input, language="en"):
|
||||
corpus, text_input = self.text_drawer.draw_text(corpus, language)
|
||||
synth_result = self.predictor.predict(style_input, text_input)
|
||||
return synth_result
|
||||
|
||||
|
||||
class DatasetSynthesiser(ImageSynthesiser):
|
||||
def __init__(self):
|
||||
super(DatasetSynthesiser, self).__init__()
|
||||
self.tag = self.FLAGS.tag
|
||||
self.output_num = self.config["Global"]["output_num"]
|
||||
corpus_generator_method = self.config["CorpusGenerator"]["method"]
|
||||
self.corpus_generator = getattr(corpus_generators,
|
||||
corpus_generator_method)(self.config)
|
||||
|
||||
style_sampler_method = self.config["StyleSampler"]["method"]
|
||||
assert style_sampler_method is not None
|
||||
self.style_sampler = style_samplers.DatasetSampler(self.config)
|
||||
self.writer = writers.SimpleWriter(self.config, self.tag)
|
||||
|
||||
def synth_dataset(self):
|
||||
for i in range(self.output_num):
|
||||
style_data = self.style_sampler.sample()
|
||||
style_input = style_data["image"]
|
||||
corpus_language, text_input_label = self.corpus_generator.generate(
|
||||
)
|
||||
text_input_label, text_input = self.text_drawer.draw_text(
|
||||
text_input_label, corpus_language)
|
||||
|
||||
synth_result = self.predictor.predict(style_input, text_input)
|
||||
fake_fusion = synth_result["fake_fusion"]
|
||||
self.writer.save_image(fake_fusion, text_input_label)
|
||||
self.writer.save_label()
|
||||
self.writer.merge_label()
|
@ -0,0 +1,57 @@
|
||||
from PIL import Image, ImageDraw, ImageFont
|
||||
import numpy as np
|
||||
from utils.logging import get_logger
|
||||
|
||||
|
||||
class StdTextDrawer(object):
|
||||
def __init__(self, config):
|
||||
self.logger = get_logger()
|
||||
self.max_width = config["Global"]["image_width"]
|
||||
self.char_list = " 0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz"
|
||||
self.height = config["Global"]["image_height"]
|
||||
self.font_dict = {}
|
||||
self.load_fonts(config["TextDrawer"]["fonts"])
|
||||
self.support_languages = list(self.font_dict)
|
||||
|
||||
def load_fonts(self, fonts_config):
|
||||
for language in fonts_config:
|
||||
font_path = fonts_config[language]
|
||||
font_height = self.get_valid_height(font_path)
|
||||
font = ImageFont.truetype(font_path, font_height)
|
||||
self.font_dict[language] = font
|
||||
|
||||
def get_valid_height(self, font_path):
|
||||
font = ImageFont.truetype(font_path, self.height - 4)
|
||||
_, font_height = font.getsize(self.char_list)
|
||||
if font_height <= self.height - 4:
|
||||
return self.height - 4
|
||||
else:
|
||||
return int((self.height - 4)**2 / font_height)
|
||||
|
||||
def draw_text(self, corpus, language="en", crop=True):
|
||||
if language not in self.support_languages:
|
||||
self.logger.warning(
|
||||
"language {} not supported, use en instead.".format(language))
|
||||
language = "en"
|
||||
if crop:
|
||||
width = min(self.max_width, len(corpus) * self.height) + 4
|
||||
else:
|
||||
width = len(corpus) * self.height + 4
|
||||
bg = Image.new("RGB", (width, self.height), color=(127, 127, 127))
|
||||
draw = ImageDraw.Draw(bg)
|
||||
|
||||
char_x = 2
|
||||
font = self.font_dict[language]
|
||||
for i, char_i in enumerate(corpus):
|
||||
char_size = font.getsize(char_i)[0]
|
||||
draw.text((char_x, 2), char_i, fill=(0, 0, 0), font=font)
|
||||
char_x += char_size
|
||||
if char_x >= width:
|
||||
corpus = corpus[0:i + 1]
|
||||
self.logger.warning("corpus length exceed limit: {}".format(
|
||||
corpus))
|
||||
break
|
||||
|
||||
text_input = np.array(bg).astype(np.uint8)
|
||||
text_input = text_input[:, 0:char_x, :]
|
||||
return corpus, text_input
|
@ -0,0 +1,71 @@
|
||||
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
import os
|
||||
import cv2
|
||||
import glob
|
||||
|
||||
from utils.logging import get_logger
|
||||
|
||||
|
||||
class SimpleWriter(object):
|
||||
def __init__(self, config, tag):
|
||||
self.logger = get_logger()
|
||||
self.output_dir = config["Global"]["output_dir"]
|
||||
self.counter = 0
|
||||
self.label_dict = {}
|
||||
self.tag = tag
|
||||
self.label_file_index = 0
|
||||
|
||||
def save_image(self, image, text_input_label):
|
||||
image_home = os.path.join(self.output_dir, "images", self.tag)
|
||||
if not os.path.exists(image_home):
|
||||
os.makedirs(image_home)
|
||||
|
||||
image_path = os.path.join(image_home, "{}.png".format(self.counter))
|
||||
# todo support continue synth
|
||||
cv2.imwrite(image_path, image)
|
||||
self.logger.info("generate image: {}".format(image_path))
|
||||
|
||||
image_name = os.path.join(self.tag, "{}.png".format(self.counter))
|
||||
self.label_dict[image_name] = text_input_label
|
||||
|
||||
self.counter += 1
|
||||
if not self.counter % 100:
|
||||
self.save_label()
|
||||
|
||||
def save_label(self):
|
||||
label_raw = ""
|
||||
label_home = os.path.join(self.output_dir, "label")
|
||||
if not os.path.exists(label_home):
|
||||
os.mkdir(label_home)
|
||||
for image_path in self.label_dict:
|
||||
label = self.label_dict[image_path]
|
||||
label_raw += "{}\t{}\n".format(image_path, label)
|
||||
label_file_path = os.path.join(label_home,
|
||||
"{}_label.txt".format(self.tag))
|
||||
with open(label_file_path, "w") as f:
|
||||
f.write(label_raw)
|
||||
self.label_file_index += 1
|
||||
|
||||
def merge_label(self):
|
||||
label_raw = ""
|
||||
label_file_regex = os.path.join(self.output_dir, "label",
|
||||
"*_label.txt")
|
||||
label_file_list = glob.glob(label_file_regex)
|
||||
for label_file_i in label_file_list:
|
||||
with open(label_file_i, "r") as f:
|
||||
label_raw += f.read()
|
||||
label_file_path = os.path.join(self.output_dir, "label.txt")
|
||||
with open(label_file_path, "w") as f:
|
||||
f.write(label_raw)
|
@ -0,0 +1,2 @@
|
||||
PaddleOCR
|
||||
飞桨文字识别
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in new issue