Merge pull request #2137 from cxysteven/vae

Add a Variational Autoencoder (VAE) demo written in V1 API.
8 years ago · 018181fb0c
parent e1dbb2fc13 ca86cbd6c9
commit 018181fb0c
6 changed files with 381 additions and 0 deletions
--- a/demo/vae/README.md
+++ b/demo/vae/README.md
@ -0,0 +1,13 @@
 #Variational Autoencoder (VAE)
 This demo implements VAE training described in the original paper (https://arxiv.org/abs/1312.6114).
 In order to run the model, first download the MNIST dataset by running the shell script in ./data.
 Then you can run the command below. The flag --useGpu specifies whether to use gpu for training (0 is cpu, 1 is gpu).  
 $python vae_train.py [--use_gpu 1]
 The generated images will be stored in ./samples/
 The corresponding models will be stored in ./params/
--- a/demo/vae/data/get_mnist_data.sh
+++ b/demo/vae/data/get_mnist_data.sh
@ -0,0 +1,17 @@
 #!/usr/bin/env sh
 # This script downloads the mnist data and unzips it.
 set -e
 DIR="$( cd "$(dirname "$0")" ; pwd -P )"
 rm -rf "$DIR/mnist_data"
 mkdir "$DIR/mnist_data"
 cd "$DIR/mnist_data"
 echo "Downloading..."
 for fname in train-images-idx3-ubyte train-labels-idx1-ubyte t10k-images-idx3-ubyte t10k-labels-idx1-ubyte
 do
    if [ ! -e $fname ]; then
        wget --no-check-certificate http://yann.lecun.com/exdb/mnist/${fname}.gz
        gunzip ${fname}.gz
    fi
 done
--- a/demo/vae/dataloader.py
+++ b/demo/vae/dataloader.py
@ -0,0 +1,60 @@
 # Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at
 #
 #     http://www.apache.org/licenses/LICENSE-2.0
 #
 # Unless required by applicable law or agreed to in writing, software
 # distributed under the License is distributed on an "AS IS" BASIS,
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
 import numpy as np
 class MNISTloader():
    def __init__(self,
                 data_path="./data/mnist_data/",
                 batch_size=60,
                 process='train'):
        self.batch_size = batch_size
        self.data_path = data_path
        self._pointer = 0
        self.image_batches = np.array([])
        self.process = process
    def _extract_images(self, filename, n):
        f = open(filename, 'rb')
        f.read(16)
        data = np.fromfile(f, 'ubyte', count=n * 28 * 28).reshape((n, 28 * 28))
        #Mapping data into [-1, 1]
        data = data / 255. * 2. - 1
        data_batches = np.split(data, 60000 / self.batch_size, 0)
        f.close()
        return data_batches
    @property
    def pointer(self):
        return self._pointer
    def load_data(self):
        TRAIN_IMAGES = '%s/train-images-idx3-ubyte' % self.data_path
        TEST_IMAGES = '%s/t10k-images-idx3-ubyte' % self.data_path
        if self.process == 'train':
            self.image_batches = self._extract_images(TRAIN_IMAGES, 60000)
        else:
            self.image_batches = self._extract_images(TEST_IMAGES, 10000)
    def next_batch(self):
        batch = self.image_batches[self._pointer]
        self._pointer = (self._pointer + 1) % (60000 / self.batch_size)
        return np.array(batch)
    def reset_pointer(self):
        self._pointer = 0
--- a/demo/vae/dataloader.pyc
+++ b/demo/vae/dataloader.pyc
--- a/demo/vae/vae_conf.py
+++ b/demo/vae/vae_conf.py
@ -0,0 +1,116 @@
 # Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at
 #
 #     http://www.apache.org/licenses/LICENSE-2.0
 #
 # Unless required by applicable law or agreed to in writing, software
 # distributed under the License is distributed on an "AS IS" BASIS,
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
 from paddle.trainer_config_helpers import *
 import numpy as np
 is_generating = get_config_arg("is_generating", bool, False)
 settings(batch_size=32, learning_rate=1e-3, learning_method=AdamOptimizer())
 X_dim = 28 * 28
 h_dim = 128
 z_dim = 100
 def reparameterization(mu, logvar):
    eps = ParamAttr(initial_mean=0., initial_std=1)
    with mixed_layer() as sigma:
        sigma += dotmul_projection(layer_math.exp(logvar) * 0.5, param_attr=eps)
    return mu + sigma
 def q_func(X):
    """
    xavier initialization
    """
    param_attr = ParamAttr(
        name='share.w', initial_mean=0., initial_std=1. / np.sqrt(X_dim / 2.))
    mu_param = ParamAttr(
        name='mu.w', initial_mean=0., initial_std=1. / np.sqrt(h_dim / 2.))
    logvar_param = ParamAttr(
        name='logvar.w', initial_mean=0., initial_std=1. / np.sqrt(h_dim / 2.))
    bias_attr = ParamAttr(name='share.bias', initial_mean=0., initial_std=0.)
    mu_bias = ParamAttr(name='mu.bias', initial_mean=0., initial_std=0.)
    logvar_bias = ParamAttr(name='logvar.bias', initial_mean=0., initial_std=0.)
    share_layer = fc_layer(
        X,
        size=h_dim,
        param_attr=param_attr,
        bias_attr=bias_attr,
        act=ReluActivation())
    return (fc_layer(
        share_layer,
        size=z_dim,
        param_attr=mu_param,
        bias_attr=mu_bias,
        act=LinearActivation()), fc_layer(
            share_layer,
            size=z_dim,
            param_attr=logvar_param,
            bias_attr=logvar_bias,
            act=LinearActivation()))
 def generator(z):
    hidden_param = ParamAttr(
        name='hidden.w', initial_mean=0., initial_std=1. / np.sqrt(z_dim / 2.))
    hidden_bias = ParamAttr(name='hidden.bias', initial_mean=0., initial_std=0.)
    prob_param = ParamAttr(
        name='prob.w', initial_mean=0., initial_std=1. / np.sqrt(h_dim / 2.))
    prob_bias = ParamAttr(name='prob.bias', initial_mean=0., initial_std=0.)
    hidden_layer = fc_layer(
        z,
        size=h_dim,
        act=ReluActivation(),
        param_attr=hidden_param,
        bias_attr=hidden_bias)
    prob = fc_layer(
        hidden_layer,
        size=X_dim,
        act=SigmoidActivation(),
        param_attr=prob_param,
        bias_attr=prob_bias)
    return prob
 def reconstruct_error(prob, X):
    cost = multi_binary_label_cross_entropy(input=prob, label=X)
    return cost
 def KL_loss(mu, logvar):
    with mixed_layer() as mu_square:
        mu_square += dotmul_operator(mu, mu, scale=1.)
    cost = 0.5 * sum_cost(layer_math.exp(logvar) + mu_square - 1. - logvar)
    return cost
 if not is_generating:
    x_batch = data_layer(name='x_batch', size=X_dim)
    mu, logvar = q_func(x_batch)
    z_samples = reparameterization(mu, logvar)
    prob = generator(z_samples)
    outputs(reconstruct_error(prob, x_batch) + KL_loss(mu, logvar))
 else:
    z_samples = data_layer(name='noise', size=z_dim)
    outputs(generator(z_samples))
--- a/demo/vae/vae_train.py
+++ b/demo/vae/vae_train.py
@ -0,0 +1,175 @@
 # Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at
 #
 #     http://www.apache.org/licenses/LICENSE-2.0
 #
 # Unless required by applicable law or agreed to in writing, software
 # distributed under the License is distributed on an "AS IS" BASIS,
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
 import argparse
 import random
 import numpy as np
 import cPickle
 import sys, os
 from PIL import Image
 from paddle.trainer.config_parser import parse_config
 from paddle.trainer.config_parser import logger
 import py_paddle.swig_paddle as api
 import dataloader
 import matplotlib.pyplot as plt
 def plot_samples(samples):
    fig = plt.figure(figsize=(4, 4))
    gs = gridspec.GridSpec(4, 4)
    gs.update(wspace=0.05, hspace=0.05)
    for i, sample in enumerate(samples):
        plt.subplot(gs[i])
        plt.axis('off')
        plt.imshow(sample.reshape(28, 28), cmap='Greys_r')
    return fig
 def CHECK_EQ(a, b):
    assert a == b, "a=%s, b=%s" % (a, b)
 def get_fake_samples(generator_machine, batch_size, noise):
    gen_inputs = api.Arguments.createArguments(1)
    gen_inputs.setSlotValue(0, api.Matrix.createDenseFromNumpy(noise))
    gen_outputs = api.Arguments.createArguments(0)
    generator_machine.forward(gen_inputs, gen_outputs, api.PASS_TEST)
    fake_samples = gen_outputs.getSlotValue(0).copyToNumpyMat()
    return fake_samples
 def copy_shared_parameters(src, dst):
    '''
    copy the parameters from src to dst
    :param src: the source of the parameters
    :type src: GradientMachine
    :param dst: the destination of the parameters
    :type dst: GradientMachine
    '''
    src_params = [src.getParameter(i) for i in xrange(src.getParameterSize())]
    src_params = dict([(p.getName(), p) for p in src_params])
    for i in xrange(dst.getParameterSize()):
        dst_param = dst.getParameter(i)
        src_param = src_params.get(dst_param.getName(), None)
        if src_param is None:
            continue
        src_value = src_param.getBuf(api.PARAMETER_VALUE)
        dst_value = dst_param.getBuf(api.PARAMETER_VALUE)
        CHECK_EQ(len(src_value), len(dst_value))
        dst_value.copyFrom(src_value)
        dst_param.setValueUpdated()
 def find(iterable, cond):
    for item in iterable:
        if cond(item):
            return item
    return None
 def get_layer_size(model_conf, layer_name):
    layer_conf = find(model_conf.layers, lambda x: x.name == layer_name)
    assert layer_conf is not None, "Cannot find '%s' layer" % layer_name
    return layer_conf.size
 def main():
    parser = argparse.ArgumentParser()
    parser.add_argument(
        "--use_gpu", default="1", help="1 means use gpu for training")
    parser.add_argument("--gpu_id", default="0", help="the gpu_id parameter")
    args = parser.parse_args()
    use_gpu = args.use_gpu
    assert use_gpu in ["0", "1"]
    if not os.path.exists("./samples/"):
        os.makedirs("./samples/")
    if not os.path.exists("./params/"):
        os.makedirs("./params/")
    api.initPaddle('--use_gpu=' + use_gpu, '--dot_period=10',
                   '--log_period=1000', '--gpu_id=' + args.gpu_id,
                   '--save_dir=' + "./params/")
    conf = "vae_conf.py"
    trainer_conf = parse_config(conf, "is_generating=False")
    gener_conf = parse_config(conf, "is_generating=True")
    batch_size = trainer_conf.opt_config.batch_size
    noise_dim = get_layer_size(gener_conf.model_config, "noise")
    mnist = dataloader.MNISTloader(batch_size=batch_size)
    mnist.load_data()
    training_machine = api.GradientMachine.createFromConfigProto(
        trainer_conf.model_config)
    generator_machine = api.GradientMachine.createFromConfigProto(
        gener_conf.model_config)
    trainer = api.Trainer.create(trainer_conf, training_machine)
    trainer.startTrain()
    for train_pass in xrange(100):
        trainer.startTrainPass()
        mnist.reset_pointer()
        i = 0
        it = 0
        while mnist.pointer != 0 or i == 0:
            X = mnist.next_batch().astype('float32')
            inputs = api.Arguments.createArguments(1)
            inputs.setSlotValue(0, api.Matrix.createDenseFromNumpy(X))
            trainer.trainOneDataBatch(batch_size, inputs)
            if it % 1000 == 0:
                outputs = api.Arguments.createArguments(0)
                training_machine.forward(inputs, outputs, api.PASS_TEST)
                loss = np.mean(outputs.getSlotValue(0).copyToNumpyMat())
                print "\niter: {}".format(str(it).zfill(3))
                print "VAE loss: {}".format(str(loss).zfill(3))
                #Sync parameters between networks (GradientMachine) at the beginning
                copy_shared_parameters(training_machine, generator_machine)
                z_samples = np.random.randn(batch_size,
                                            noise_dim).astype('float32')
                samples = get_fake_samples(generator_machine, batch_size,
                                           z_samples)
                #Generating the first 16 images for a picture. 
                figure = plot_samples(samples[:16])
                plt.savefig(
                    "./samples/{}_{}.png".format(
                        str(train_pass).zfill(3), str(i).zfill(3)),
                    bbox_inches='tight')
                plt.close(figure)
                i += 1
            it += 1
        trainer.finishTrainPass()
    trainer.finishTrain()
 if __name__ == '__main__':
    main()