Merge pull request #2137 from cxysteven/vae

Add a Variational Autoencoder (VAE) demo written in V1 API.
8 years ago · 018181fb0c
parent e1dbb2fc13 ca86cbd6c9
commit 018181fb0c
6 changed files with 381 additions and 0 deletions
--- a/demo/vae/README.md
+++ b/demo/vae/README.md
@ -0,0 +1,13 @@
+#Variational Autoencoder (VAE)
+
+This demo implements VAE training described in the original paper (https://arxiv.org/abs/1312.6114).
+
+
+In order to run the model, first download the MNIST dataset by running the shell script in ./data.
+
+Then you can run the command below. The flag --useGpu specifies whether to use gpu for training (0 is cpu, 1 is gpu).  
+
+$python vae_train.py [--use_gpu 1]
+
+The generated images will be stored in ./samples/
+The corresponding models will be stored in ./params/
--- a/demo/vae/data/get_mnist_data.sh
+++ b/demo/vae/data/get_mnist_data.sh
@ -0,0 +1,17 @@
+#!/usr/bin/env sh
+# This script downloads the mnist data and unzips it.
+set -e
+DIR="$( cd "$(dirname "$0")" ; pwd -P )"
+rm -rf "$DIR/mnist_data"
+mkdir "$DIR/mnist_data"
+cd "$DIR/mnist_data"
+
+echo "Downloading..."
+
+for fname in train-images-idx3-ubyte train-labels-idx1-ubyte t10k-images-idx3-ubyte t10k-labels-idx1-ubyte
+do
+    if [ ! -e $fname ]; then
+        wget --no-check-certificate http://yann.lecun.com/exdb/mnist/${fname}.gz
+        gunzip ${fname}.gz
+    fi
+done
--- a/demo/vae/dataloader.py
+++ b/demo/vae/dataloader.py
@ -0,0 +1,60 @@
+# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import numpy as np
+
+
+class MNISTloader():
+    def __init__(self,
+                 data_path="./data/mnist_data/",
+                 batch_size=60,
+                 process='train'):
+        self.batch_size = batch_size
+        self.data_path = data_path
+        self._pointer = 0
+        self.image_batches = np.array([])
+        self.process = process
+
+    def _extract_images(self, filename, n):
+        f = open(filename, 'rb')
+        f.read(16)
+        data = np.fromfile(f, 'ubyte', count=n * 28 * 28).reshape((n, 28 * 28))
+        #Mapping data into [-1, 1]
+        data = data / 255. * 2. - 1
+        data_batches = np.split(data, 60000 / self.batch_size, 0)
+
+        f.close()
+
+        return data_batches
+
+    @property
+    def pointer(self):
+        return self._pointer
+
+    def load_data(self):
+        TRAIN_IMAGES = '%s/train-images-idx3-ubyte' % self.data_path
+        TEST_IMAGES = '%s/t10k-images-idx3-ubyte' % self.data_path
+
+        if self.process == 'train':
+            self.image_batches = self._extract_images(TRAIN_IMAGES, 60000)
+        else:
+            self.image_batches = self._extract_images(TEST_IMAGES, 10000)
+
+    def next_batch(self):
+        batch = self.image_batches[self._pointer]
+        self._pointer = (self._pointer + 1) % (60000 / self.batch_size)
+        return np.array(batch)
+
+    def reset_pointer(self):
+        self._pointer = 0
--- a/demo/vae/dataloader.pyc
+++ b/demo/vae/dataloader.pyc
--- a/demo/vae/vae_conf.py
+++ b/demo/vae/vae_conf.py
@ -0,0 +1,116 @@
+# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from paddle.trainer_config_helpers import *
+import numpy as np
+
+is_generating = get_config_arg("is_generating", bool, False)
+
+settings(batch_size=32, learning_rate=1e-3, learning_method=AdamOptimizer())
+
+X_dim = 28 * 28
+h_dim = 128
+z_dim = 100
+
+
+def reparameterization(mu, logvar):
+    eps = ParamAttr(initial_mean=0., initial_std=1)
+    with mixed_layer() as sigma:
+        sigma += dotmul_projection(layer_math.exp(logvar) * 0.5, param_attr=eps)
+    return mu + sigma
+
+
+def q_func(X):
+    """
+    xavier initialization
+    """
+    param_attr = ParamAttr(
+        name='share.w', initial_mean=0., initial_std=1. / np.sqrt(X_dim / 2.))
+    mu_param = ParamAttr(
+        name='mu.w', initial_mean=0., initial_std=1. / np.sqrt(h_dim / 2.))
+    logvar_param = ParamAttr(
+        name='logvar.w', initial_mean=0., initial_std=1. / np.sqrt(h_dim / 2.))
+
+    bias_attr = ParamAttr(name='share.bias', initial_mean=0., initial_std=0.)
+    mu_bias = ParamAttr(name='mu.bias', initial_mean=0., initial_std=0.)
+    logvar_bias = ParamAttr(name='logvar.bias', initial_mean=0., initial_std=0.)
+
+    share_layer = fc_layer(
+        X,
+        size=h_dim,
+        param_attr=param_attr,
+        bias_attr=bias_attr,
+        act=ReluActivation())
+
+    return (fc_layer(
+        share_layer,
+        size=z_dim,
+        param_attr=mu_param,
+        bias_attr=mu_bias,
+        act=LinearActivation()), fc_layer(
+            share_layer,
+            size=z_dim,
+            param_attr=logvar_param,
+            bias_attr=logvar_bias,
+            act=LinearActivation()))
+
+
+def generator(z):
+
+    hidden_param = ParamAttr(
+        name='hidden.w', initial_mean=0., initial_std=1. / np.sqrt(z_dim / 2.))
+    hidden_bias = ParamAttr(name='hidden.bias', initial_mean=0., initial_std=0.)
+    prob_param = ParamAttr(
+        name='prob.w', initial_mean=0., initial_std=1. / np.sqrt(h_dim / 2.))
+    prob_bias = ParamAttr(name='prob.bias', initial_mean=0., initial_std=0.)
+
+    hidden_layer = fc_layer(
+        z,
+        size=h_dim,
+        act=ReluActivation(),
+        param_attr=hidden_param,
+        bias_attr=hidden_bias)
+    prob = fc_layer(
+        hidden_layer,
+        size=X_dim,
+        act=SigmoidActivation(),
+        param_attr=prob_param,
+        bias_attr=prob_bias)
+
+    return prob
+
+
+def reconstruct_error(prob, X):
+    cost = multi_binary_label_cross_entropy(input=prob, label=X)
+    return cost
+
+
+def KL_loss(mu, logvar):
+    with mixed_layer() as mu_square:
+        mu_square += dotmul_operator(mu, mu, scale=1.)
+
+    cost = 0.5 * sum_cost(layer_math.exp(logvar) + mu_square - 1. - logvar)
+
+    return cost
+
+
+if not is_generating:
+    x_batch = data_layer(name='x_batch', size=X_dim)
+    mu, logvar = q_func(x_batch)
+    z_samples = reparameterization(mu, logvar)
+    prob = generator(z_samples)
+    outputs(reconstruct_error(prob, x_batch) + KL_loss(mu, logvar))
+else:
+    z_samples = data_layer(name='noise', size=z_dim)
+    outputs(generator(z_samples))
--- a/demo/vae/vae_train.py
+++ b/demo/vae/vae_train.py
@ -0,0 +1,175 @@
+# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import argparse
+import random
+import numpy as np
+import cPickle
+import sys, os
+from PIL import Image
+
+from paddle.trainer.config_parser import parse_config
+from paddle.trainer.config_parser import logger
+import py_paddle.swig_paddle as api
+import dataloader
+import matplotlib.pyplot as plt
+
+
+def plot_samples(samples):
+    fig = plt.figure(figsize=(4, 4))
+    gs = gridspec.GridSpec(4, 4)
+    gs.update(wspace=0.05, hspace=0.05)
+    for i, sample in enumerate(samples):
+        plt.subplot(gs[i])
+        plt.axis('off')
+        plt.imshow(sample.reshape(28, 28), cmap='Greys_r')
+
+    return fig
+
+
+def CHECK_EQ(a, b):
+    assert a == b, "a=%s, b=%s" % (a, b)
+
+
+def get_fake_samples(generator_machine, batch_size, noise):
+    gen_inputs = api.Arguments.createArguments(1)
+    gen_inputs.setSlotValue(0, api.Matrix.createDenseFromNumpy(noise))
+    gen_outputs = api.Arguments.createArguments(0)
+    generator_machine.forward(gen_inputs, gen_outputs, api.PASS_TEST)
+    fake_samples = gen_outputs.getSlotValue(0).copyToNumpyMat()
+    return fake_samples
+
+
+def copy_shared_parameters(src, dst):
+    '''
+    copy the parameters from src to dst
+    :param src: the source of the parameters
+    :type src: GradientMachine
+    :param dst: the destination of the parameters
+    :type dst: GradientMachine
+    '''
+    src_params = [src.getParameter(i) for i in xrange(src.getParameterSize())]
+    src_params = dict([(p.getName(), p) for p in src_params])
+
+    for i in xrange(dst.getParameterSize()):
+        dst_param = dst.getParameter(i)
+        src_param = src_params.get(dst_param.getName(), None)
+        if src_param is None:
+            continue
+        src_value = src_param.getBuf(api.PARAMETER_VALUE)
+        dst_value = dst_param.getBuf(api.PARAMETER_VALUE)
+        CHECK_EQ(len(src_value), len(dst_value))
+        dst_value.copyFrom(src_value)
+        dst_param.setValueUpdated()
+
+
+def find(iterable, cond):
+    for item in iterable:
+        if cond(item):
+            return item
+    return None
+
+
+def get_layer_size(model_conf, layer_name):
+    layer_conf = find(model_conf.layers, lambda x: x.name == layer_name)
+    assert layer_conf is not None, "Cannot find '%s' layer" % layer_name
+    return layer_conf.size
+
+
+def main():
+    parser = argparse.ArgumentParser()
+    parser.add_argument(
+        "--use_gpu", default="1", help="1 means use gpu for training")
+    parser.add_argument("--gpu_id", default="0", help="the gpu_id parameter")
+    args = parser.parse_args()
+    use_gpu = args.use_gpu
+    assert use_gpu in ["0", "1"]
+
+    if not os.path.exists("./samples/"):
+        os.makedirs("./samples/")
+
+    if not os.path.exists("./params/"):
+        os.makedirs("./params/")
+
+    api.initPaddle('--use_gpu=' + use_gpu, '--dot_period=10',
+                   '--log_period=1000', '--gpu_id=' + args.gpu_id,
+                   '--save_dir=' + "./params/")
+
+    conf = "vae_conf.py"
+
+    trainer_conf = parse_config(conf, "is_generating=False")
+    gener_conf = parse_config(conf, "is_generating=True")
+
+    batch_size = trainer_conf.opt_config.batch_size
+
+    noise_dim = get_layer_size(gener_conf.model_config, "noise")
+
+    mnist = dataloader.MNISTloader(batch_size=batch_size)
+    mnist.load_data()
+
+    training_machine = api.GradientMachine.createFromConfigProto(
+        trainer_conf.model_config)
+
+    generator_machine = api.GradientMachine.createFromConfigProto(
+        gener_conf.model_config)
+
+    trainer = api.Trainer.create(trainer_conf, training_machine)
+
+    trainer.startTrain()
+
+    for train_pass in xrange(100):
+        trainer.startTrainPass()
+        mnist.reset_pointer()
+        i = 0
+        it = 0
+        while mnist.pointer != 0 or i == 0:
+            X = mnist.next_batch().astype('float32')
+
+            inputs = api.Arguments.createArguments(1)
+            inputs.setSlotValue(0, api.Matrix.createDenseFromNumpy(X))
+
+            trainer.trainOneDataBatch(batch_size, inputs)
+
+            if it % 1000 == 0:
+
+                outputs = api.Arguments.createArguments(0)
+                training_machine.forward(inputs, outputs, api.PASS_TEST)
+                loss = np.mean(outputs.getSlotValue(0).copyToNumpyMat())
+                print "\niter: {}".format(str(it).zfill(3))
+                print "VAE loss: {}".format(str(loss).zfill(3))
+
+                #Sync parameters between networks (GradientMachine) at the beginning
+                copy_shared_parameters(training_machine, generator_machine)
+
+                z_samples = np.random.randn(batch_size,
+                                            noise_dim).astype('float32')
+                samples = get_fake_samples(generator_machine, batch_size,
+                                           z_samples)
+
+                #Generating the first 16 images for a picture. 
+                figure = plot_samples(samples[:16])
+                plt.savefig(
+                    "./samples/{}_{}.png".format(
+                        str(train_pass).zfill(3), str(i).zfill(3)),
+                    bbox_inches='tight')
+                plt.close(figure)
+                i += 1
+            it += 1
+
+        trainer.finishTrainPass()
+    trainer.finishTrain()
+
+
+if __name__ == '__main__':
+    main()