Merge pull request #2137 from cxysteven/vae
Add a Variational Autoencoder (VAE) demo written in V1 API.gangliao-patch-1
commit
018181fb0c
@ -0,0 +1,13 @@
|
||||
#Variational Autoencoder (VAE)
|
||||
|
||||
This demo implements VAE training described in the original paper (https://arxiv.org/abs/1312.6114).
|
||||
|
||||
|
||||
In order to run the model, first download the MNIST dataset by running the shell script in ./data.
|
||||
|
||||
Then you can run the command below. The flag --useGpu specifies whether to use gpu for training (0 is cpu, 1 is gpu).
|
||||
|
||||
$python vae_train.py [--use_gpu 1]
|
||||
|
||||
The generated images will be stored in ./samples/
|
||||
The corresponding models will be stored in ./params/
|
@ -0,0 +1,17 @@
|
||||
#!/usr/bin/env sh
|
||||
# This script downloads the mnist data and unzips it.
|
||||
set -e
|
||||
DIR="$( cd "$(dirname "$0")" ; pwd -P )"
|
||||
rm -rf "$DIR/mnist_data"
|
||||
mkdir "$DIR/mnist_data"
|
||||
cd "$DIR/mnist_data"
|
||||
|
||||
echo "Downloading..."
|
||||
|
||||
for fname in train-images-idx3-ubyte train-labels-idx1-ubyte t10k-images-idx3-ubyte t10k-labels-idx1-ubyte
|
||||
do
|
||||
if [ ! -e $fname ]; then
|
||||
wget --no-check-certificate http://yann.lecun.com/exdb/mnist/${fname}.gz
|
||||
gunzip ${fname}.gz
|
||||
fi
|
||||
done
|
@ -0,0 +1,60 @@
|
||||
# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
import numpy as np
|
||||
|
||||
|
||||
class MNISTloader():
|
||||
def __init__(self,
|
||||
data_path="./data/mnist_data/",
|
||||
batch_size=60,
|
||||
process='train'):
|
||||
self.batch_size = batch_size
|
||||
self.data_path = data_path
|
||||
self._pointer = 0
|
||||
self.image_batches = np.array([])
|
||||
self.process = process
|
||||
|
||||
def _extract_images(self, filename, n):
|
||||
f = open(filename, 'rb')
|
||||
f.read(16)
|
||||
data = np.fromfile(f, 'ubyte', count=n * 28 * 28).reshape((n, 28 * 28))
|
||||
#Mapping data into [-1, 1]
|
||||
data = data / 255. * 2. - 1
|
||||
data_batches = np.split(data, 60000 / self.batch_size, 0)
|
||||
|
||||
f.close()
|
||||
|
||||
return data_batches
|
||||
|
||||
@property
|
||||
def pointer(self):
|
||||
return self._pointer
|
||||
|
||||
def load_data(self):
|
||||
TRAIN_IMAGES = '%s/train-images-idx3-ubyte' % self.data_path
|
||||
TEST_IMAGES = '%s/t10k-images-idx3-ubyte' % self.data_path
|
||||
|
||||
if self.process == 'train':
|
||||
self.image_batches = self._extract_images(TRAIN_IMAGES, 60000)
|
||||
else:
|
||||
self.image_batches = self._extract_images(TEST_IMAGES, 10000)
|
||||
|
||||
def next_batch(self):
|
||||
batch = self.image_batches[self._pointer]
|
||||
self._pointer = (self._pointer + 1) % (60000 / self.batch_size)
|
||||
return np.array(batch)
|
||||
|
||||
def reset_pointer(self):
|
||||
self._pointer = 0
|
Binary file not shown.
@ -0,0 +1,116 @@
|
||||
# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
from paddle.trainer_config_helpers import *
|
||||
import numpy as np
|
||||
|
||||
is_generating = get_config_arg("is_generating", bool, False)
|
||||
|
||||
settings(batch_size=32, learning_rate=1e-3, learning_method=AdamOptimizer())
|
||||
|
||||
X_dim = 28 * 28
|
||||
h_dim = 128
|
||||
z_dim = 100
|
||||
|
||||
|
||||
def reparameterization(mu, logvar):
|
||||
eps = ParamAttr(initial_mean=0., initial_std=1)
|
||||
with mixed_layer() as sigma:
|
||||
sigma += dotmul_projection(layer_math.exp(logvar) * 0.5, param_attr=eps)
|
||||
return mu + sigma
|
||||
|
||||
|
||||
def q_func(X):
|
||||
"""
|
||||
xavier initialization
|
||||
"""
|
||||
param_attr = ParamAttr(
|
||||
name='share.w', initial_mean=0., initial_std=1. / np.sqrt(X_dim / 2.))
|
||||
mu_param = ParamAttr(
|
||||
name='mu.w', initial_mean=0., initial_std=1. / np.sqrt(h_dim / 2.))
|
||||
logvar_param = ParamAttr(
|
||||
name='logvar.w', initial_mean=0., initial_std=1. / np.sqrt(h_dim / 2.))
|
||||
|
||||
bias_attr = ParamAttr(name='share.bias', initial_mean=0., initial_std=0.)
|
||||
mu_bias = ParamAttr(name='mu.bias', initial_mean=0., initial_std=0.)
|
||||
logvar_bias = ParamAttr(name='logvar.bias', initial_mean=0., initial_std=0.)
|
||||
|
||||
share_layer = fc_layer(
|
||||
X,
|
||||
size=h_dim,
|
||||
param_attr=param_attr,
|
||||
bias_attr=bias_attr,
|
||||
act=ReluActivation())
|
||||
|
||||
return (fc_layer(
|
||||
share_layer,
|
||||
size=z_dim,
|
||||
param_attr=mu_param,
|
||||
bias_attr=mu_bias,
|
||||
act=LinearActivation()), fc_layer(
|
||||
share_layer,
|
||||
size=z_dim,
|
||||
param_attr=logvar_param,
|
||||
bias_attr=logvar_bias,
|
||||
act=LinearActivation()))
|
||||
|
||||
|
||||
def generator(z):
|
||||
|
||||
hidden_param = ParamAttr(
|
||||
name='hidden.w', initial_mean=0., initial_std=1. / np.sqrt(z_dim / 2.))
|
||||
hidden_bias = ParamAttr(name='hidden.bias', initial_mean=0., initial_std=0.)
|
||||
prob_param = ParamAttr(
|
||||
name='prob.w', initial_mean=0., initial_std=1. / np.sqrt(h_dim / 2.))
|
||||
prob_bias = ParamAttr(name='prob.bias', initial_mean=0., initial_std=0.)
|
||||
|
||||
hidden_layer = fc_layer(
|
||||
z,
|
||||
size=h_dim,
|
||||
act=ReluActivation(),
|
||||
param_attr=hidden_param,
|
||||
bias_attr=hidden_bias)
|
||||
prob = fc_layer(
|
||||
hidden_layer,
|
||||
size=X_dim,
|
||||
act=SigmoidActivation(),
|
||||
param_attr=prob_param,
|
||||
bias_attr=prob_bias)
|
||||
|
||||
return prob
|
||||
|
||||
|
||||
def reconstruct_error(prob, X):
|
||||
cost = multi_binary_label_cross_entropy(input=prob, label=X)
|
||||
return cost
|
||||
|
||||
|
||||
def KL_loss(mu, logvar):
|
||||
with mixed_layer() as mu_square:
|
||||
mu_square += dotmul_operator(mu, mu, scale=1.)
|
||||
|
||||
cost = 0.5 * sum_cost(layer_math.exp(logvar) + mu_square - 1. - logvar)
|
||||
|
||||
return cost
|
||||
|
||||
|
||||
if not is_generating:
|
||||
x_batch = data_layer(name='x_batch', size=X_dim)
|
||||
mu, logvar = q_func(x_batch)
|
||||
z_samples = reparameterization(mu, logvar)
|
||||
prob = generator(z_samples)
|
||||
outputs(reconstruct_error(prob, x_batch) + KL_loss(mu, logvar))
|
||||
else:
|
||||
z_samples = data_layer(name='noise', size=z_dim)
|
||||
outputs(generator(z_samples))
|
@ -0,0 +1,175 @@
|
||||
# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
import argparse
|
||||
import random
|
||||
import numpy as np
|
||||
import cPickle
|
||||
import sys, os
|
||||
from PIL import Image
|
||||
|
||||
from paddle.trainer.config_parser import parse_config
|
||||
from paddle.trainer.config_parser import logger
|
||||
import py_paddle.swig_paddle as api
|
||||
import dataloader
|
||||
import matplotlib.pyplot as plt
|
||||
|
||||
|
||||
def plot_samples(samples):
|
||||
fig = plt.figure(figsize=(4, 4))
|
||||
gs = gridspec.GridSpec(4, 4)
|
||||
gs.update(wspace=0.05, hspace=0.05)
|
||||
for i, sample in enumerate(samples):
|
||||
plt.subplot(gs[i])
|
||||
plt.axis('off')
|
||||
plt.imshow(sample.reshape(28, 28), cmap='Greys_r')
|
||||
|
||||
return fig
|
||||
|
||||
|
||||
def CHECK_EQ(a, b):
|
||||
assert a == b, "a=%s, b=%s" % (a, b)
|
||||
|
||||
|
||||
def get_fake_samples(generator_machine, batch_size, noise):
|
||||
gen_inputs = api.Arguments.createArguments(1)
|
||||
gen_inputs.setSlotValue(0, api.Matrix.createDenseFromNumpy(noise))
|
||||
gen_outputs = api.Arguments.createArguments(0)
|
||||
generator_machine.forward(gen_inputs, gen_outputs, api.PASS_TEST)
|
||||
fake_samples = gen_outputs.getSlotValue(0).copyToNumpyMat()
|
||||
return fake_samples
|
||||
|
||||
|
||||
def copy_shared_parameters(src, dst):
|
||||
'''
|
||||
copy the parameters from src to dst
|
||||
:param src: the source of the parameters
|
||||
:type src: GradientMachine
|
||||
:param dst: the destination of the parameters
|
||||
:type dst: GradientMachine
|
||||
'''
|
||||
src_params = [src.getParameter(i) for i in xrange(src.getParameterSize())]
|
||||
src_params = dict([(p.getName(), p) for p in src_params])
|
||||
|
||||
for i in xrange(dst.getParameterSize()):
|
||||
dst_param = dst.getParameter(i)
|
||||
src_param = src_params.get(dst_param.getName(), None)
|
||||
if src_param is None:
|
||||
continue
|
||||
src_value = src_param.getBuf(api.PARAMETER_VALUE)
|
||||
dst_value = dst_param.getBuf(api.PARAMETER_VALUE)
|
||||
CHECK_EQ(len(src_value), len(dst_value))
|
||||
dst_value.copyFrom(src_value)
|
||||
dst_param.setValueUpdated()
|
||||
|
||||
|
||||
def find(iterable, cond):
|
||||
for item in iterable:
|
||||
if cond(item):
|
||||
return item
|
||||
return None
|
||||
|
||||
|
||||
def get_layer_size(model_conf, layer_name):
|
||||
layer_conf = find(model_conf.layers, lambda x: x.name == layer_name)
|
||||
assert layer_conf is not None, "Cannot find '%s' layer" % layer_name
|
||||
return layer_conf.size
|
||||
|
||||
|
||||
def main():
|
||||
parser = argparse.ArgumentParser()
|
||||
parser.add_argument(
|
||||
"--use_gpu", default="1", help="1 means use gpu for training")
|
||||
parser.add_argument("--gpu_id", default="0", help="the gpu_id parameter")
|
||||
args = parser.parse_args()
|
||||
use_gpu = args.use_gpu
|
||||
assert use_gpu in ["0", "1"]
|
||||
|
||||
if not os.path.exists("./samples/"):
|
||||
os.makedirs("./samples/")
|
||||
|
||||
if not os.path.exists("./params/"):
|
||||
os.makedirs("./params/")
|
||||
|
||||
api.initPaddle('--use_gpu=' + use_gpu, '--dot_period=10',
|
||||
'--log_period=1000', '--gpu_id=' + args.gpu_id,
|
||||
'--save_dir=' + "./params/")
|
||||
|
||||
conf = "vae_conf.py"
|
||||
|
||||
trainer_conf = parse_config(conf, "is_generating=False")
|
||||
gener_conf = parse_config(conf, "is_generating=True")
|
||||
|
||||
batch_size = trainer_conf.opt_config.batch_size
|
||||
|
||||
noise_dim = get_layer_size(gener_conf.model_config, "noise")
|
||||
|
||||
mnist = dataloader.MNISTloader(batch_size=batch_size)
|
||||
mnist.load_data()
|
||||
|
||||
training_machine = api.GradientMachine.createFromConfigProto(
|
||||
trainer_conf.model_config)
|
||||
|
||||
generator_machine = api.GradientMachine.createFromConfigProto(
|
||||
gener_conf.model_config)
|
||||
|
||||
trainer = api.Trainer.create(trainer_conf, training_machine)
|
||||
|
||||
trainer.startTrain()
|
||||
|
||||
for train_pass in xrange(100):
|
||||
trainer.startTrainPass()
|
||||
mnist.reset_pointer()
|
||||
i = 0
|
||||
it = 0
|
||||
while mnist.pointer != 0 or i == 0:
|
||||
X = mnist.next_batch().astype('float32')
|
||||
|
||||
inputs = api.Arguments.createArguments(1)
|
||||
inputs.setSlotValue(0, api.Matrix.createDenseFromNumpy(X))
|
||||
|
||||
trainer.trainOneDataBatch(batch_size, inputs)
|
||||
|
||||
if it % 1000 == 0:
|
||||
|
||||
outputs = api.Arguments.createArguments(0)
|
||||
training_machine.forward(inputs, outputs, api.PASS_TEST)
|
||||
loss = np.mean(outputs.getSlotValue(0).copyToNumpyMat())
|
||||
print "\niter: {}".format(str(it).zfill(3))
|
||||
print "VAE loss: {}".format(str(loss).zfill(3))
|
||||
|
||||
#Sync parameters between networks (GradientMachine) at the beginning
|
||||
copy_shared_parameters(training_machine, generator_machine)
|
||||
|
||||
z_samples = np.random.randn(batch_size,
|
||||
noise_dim).astype('float32')
|
||||
samples = get_fake_samples(generator_machine, batch_size,
|
||||
z_samples)
|
||||
|
||||
#Generating the first 16 images for a picture.
|
||||
figure = plot_samples(samples[:16])
|
||||
plt.savefig(
|
||||
"./samples/{}_{}.png".format(
|
||||
str(train_pass).zfill(3), str(i).zfill(3)),
|
||||
bbox_inches='tight')
|
||||
plt.close(figure)
|
||||
i += 1
|
||||
it += 1
|
||||
|
||||
trainer.finishTrainPass()
|
||||
trainer.finishTrain()
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
Loading…
Reference in new issue