commit
e92fe9e64f
@ -1,9 +0,0 @@
|
||||
# Advbox
|
||||
|
||||
Advbox is a Python toolbox to create adversarial examples that fool neural networks. It requires Python and paddle.
|
||||
|
||||
## How to use
|
||||
|
||||
1. train a model and save it's parameters. (like fluid_mnist.py)
|
||||
2. load the parameters which is trained in step1, then reconstruct the model.(like mnist_tutorial_fgsm.py)
|
||||
3. use advbox to generate the adversarial sample.
|
@ -1,16 +0,0 @@
|
||||
# Copyright (c) 2017 PaddlePaddle Authors. All Rights Reserved
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
"""
|
||||
A set of tools for generating adversarial example on paddle platform
|
||||
"""
|
@ -1,52 +0,0 @@
|
||||
# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
|
||||
#
|
||||
#Licensed under the Apache License, Version 2.0 (the "License");
|
||||
#you may not use this file except in compliance with the License.
|
||||
#You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
#Unless required by applicable law or agreed to in writing, software
|
||||
#distributed under the License is distributed on an "AS IS" BASIS,
|
||||
#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
#See the License for the specific language governing permissions and
|
||||
#limitations under the License.
|
||||
"""
|
||||
The base model of the model.
|
||||
"""
|
||||
from abc import ABCMeta, abstractmethod
|
||||
|
||||
|
||||
class Attack(object):
|
||||
"""
|
||||
Abstract base class for adversarial attacks. `Attack` represent an adversarial attack
|
||||
which search an adversarial example. subclass should implement the _apply() method.
|
||||
|
||||
Args:
|
||||
model(Model): an instance of the class advbox.base.Model.
|
||||
|
||||
"""
|
||||
__metaclass__ = ABCMeta
|
||||
|
||||
def __init__(self, model):
|
||||
self.model = model
|
||||
|
||||
def __call__(self, image_label):
|
||||
"""
|
||||
Generate the adversarial sample.
|
||||
|
||||
Args:
|
||||
image_label(list): The image and label tuple list with one element.
|
||||
"""
|
||||
adv_img = self._apply(image_label)
|
||||
return adv_img
|
||||
|
||||
@abstractmethod
|
||||
def _apply(self, image_label):
|
||||
"""
|
||||
Search an adversarial example.
|
||||
|
||||
Args:
|
||||
image_batch(list): The image and label tuple list with one element.
|
||||
"""
|
||||
raise NotImplementedError
|
@ -1,87 +0,0 @@
|
||||
# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
|
||||
#
|
||||
#Licensed under the Apache License, Version 2.0 (the "License");
|
||||
#you may not use this file except in compliance with the License.
|
||||
#You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
#Unless required by applicable law or agreed to in writing, software
|
||||
#distributed under the License is distributed on an "AS IS" BASIS,
|
||||
#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
#See the License for the specific language governing permissions and
|
||||
#limitations under the License.
|
||||
"""
|
||||
This module provide the attack method for FGSM's implement.
|
||||
"""
|
||||
from __future__ import division
|
||||
import numpy as np
|
||||
from collections import Iterable
|
||||
from .base import Attack
|
||||
|
||||
|
||||
class GradientSignAttack(Attack):
|
||||
"""
|
||||
This attack was originally implemented by Goodfellow et al. (2015) with the
|
||||
infinity norm (and is known as the "Fast Gradient Sign Method"). This is therefore called
|
||||
the Fast Gradient Method.
|
||||
Paper link: https://arxiv.org/abs/1412.6572
|
||||
"""
|
||||
|
||||
def _apply(self, image_label, epsilons=1000):
|
||||
assert len(image_label) == 1
|
||||
pre_label = np.argmax(self.model.predict(image_label))
|
||||
|
||||
min_, max_ = self.model.bounds()
|
||||
gradient = self.model.gradient(image_label)
|
||||
gradient_sign = np.sign(gradient) * (max_ - min_)
|
||||
|
||||
if not isinstance(epsilons, Iterable):
|
||||
epsilons = np.linspace(0, 1, num=epsilons + 1)
|
||||
|
||||
for epsilon in epsilons:
|
||||
adv_img = image_label[0][0].reshape(
|
||||
gradient_sign.shape) + epsilon * gradient_sign
|
||||
adv_img = np.clip(adv_img, min_, max_)
|
||||
adv_label = np.argmax(self.model.predict([(adv_img, 0)]))
|
||||
if pre_label != adv_label:
|
||||
return adv_img
|
||||
|
||||
|
||||
FGSM = GradientSignAttack
|
||||
|
||||
|
||||
class IteratorGradientSignAttack(Attack):
|
||||
"""
|
||||
This attack was originally implemented by Alexey Kurakin(Google Brain).
|
||||
Paper link: https://arxiv.org/pdf/1607.02533.pdf
|
||||
"""
|
||||
|
||||
def _apply(self, image_label, epsilons=100, steps=10):
|
||||
"""
|
||||
Apply the iterative gradient sign attack.
|
||||
Args:
|
||||
image_label(list): The image and label tuple list of one element.
|
||||
epsilons(list|tuple|int): The epsilon (input variation parameter).
|
||||
steps(int): The number of iterator steps.
|
||||
Return:
|
||||
numpy.ndarray: The adversarail sample generated by the algorithm.
|
||||
"""
|
||||
assert len(image_label) == 1
|
||||
pre_label = np.argmax(self.model.predict(image_label))
|
||||
gradient = self.model.gradient(image_label)
|
||||
min_, max_ = self.model.bounds()
|
||||
|
||||
if not isinstance(epsilons, Iterable):
|
||||
epsilons = np.linspace(0, 1, num=epsilons + 1)
|
||||
|
||||
for epsilon in epsilons:
|
||||
adv_img = image_label[0][0].reshape(gradient.shape)
|
||||
for _ in range(steps):
|
||||
gradient = self.model.gradient([(adv_img, image_label[0][1])])
|
||||
gradient_sign = np.sign(gradient) * (max_ - min_)
|
||||
adv_img = adv_img + epsilon * gradient_sign
|
||||
adv_img = np.clip(adv_img, min_, max_)
|
||||
adv_label = np.argmax(self.model.predict([(adv_img, 0)]))
|
||||
if pre_label != adv_label:
|
||||
return adv_img
|
@ -1,16 +0,0 @@
|
||||
# Copyright (c) 2017 PaddlePaddle Authors. All Rights Reserved
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
"""
|
||||
Paddle model for target of attack
|
||||
"""
|
@ -1,103 +0,0 @@
|
||||
# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
|
||||
#
|
||||
#Licensed under the Apache License, Version 2.0 (the "License");
|
||||
#you may not use this file except in compliance with the License.
|
||||
#You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
#Unless required by applicable law or agreed to in writing, software
|
||||
#distributed under the License is distributed on an "AS IS" BASIS,
|
||||
#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
#See the License for the specific language governing permissions and
|
||||
#limitations under the License.
|
||||
"""
|
||||
The base model of the model.
|
||||
"""
|
||||
from abc import ABCMeta
|
||||
import abc
|
||||
|
||||
abstractmethod = abc.abstractmethod
|
||||
|
||||
|
||||
class Model(object):
|
||||
"""
|
||||
Base class of model to provide attack.
|
||||
|
||||
|
||||
Args:
|
||||
bounds(tuple): The lower and upper bound for the image pixel.
|
||||
channel_axis(int): The index of the axis that represents the color channel.
|
||||
preprocess(tuple): Two element tuple used to preprocess the input. First
|
||||
substract the first element, then divide the second element.
|
||||
"""
|
||||
__metaclass__ = ABCMeta
|
||||
|
||||
def __init__(self, bounds, channel_axis, preprocess=None):
|
||||
assert len(bounds) == 2
|
||||
assert channel_axis in [0, 1, 2, 3]
|
||||
|
||||
if preprocess is None:
|
||||
preprocess = (0, 1)
|
||||
self._bounds = bounds
|
||||
self._channel_axis = channel_axis
|
||||
self._preprocess = preprocess
|
||||
|
||||
def bounds(self):
|
||||
"""
|
||||
Return the upper and lower bounds of the model.
|
||||
"""
|
||||
return self._bounds
|
||||
|
||||
def channel_axis(self):
|
||||
"""
|
||||
Return the channel axis of the model.
|
||||
"""
|
||||
return self._channel_axis
|
||||
|
||||
def _process_input(self, input_):
|
||||
res = input_
|
||||
sub, div = self._preprocess
|
||||
if sub != 0:
|
||||
res = input_ - sub
|
||||
assert div != 0
|
||||
if div != 1:
|
||||
res /= div
|
||||
return res
|
||||
|
||||
@abstractmethod
|
||||
def predict(self, image_batch):
|
||||
"""
|
||||
Calculate the prediction of the image batch.
|
||||
|
||||
Args:
|
||||
image_batch(numpy.ndarray): image batch of shape (batch_size, height, width, channels).
|
||||
|
||||
Return:
|
||||
numpy.ndarray: predictions of the images with shape (batch_size, num_of_classes).
|
||||
"""
|
||||
raise NotImplementedError
|
||||
|
||||
@abstractmethod
|
||||
def num_classes(self):
|
||||
"""
|
||||
Determine the number of the classes
|
||||
|
||||
Return:
|
||||
int: the number of the classes
|
||||
"""
|
||||
raise NotImplementedError
|
||||
|
||||
@abstractmethod
|
||||
def gradient(self, image_batch):
|
||||
"""
|
||||
Calculate the gradient of the cross-entropy loss w.r.t the image.
|
||||
|
||||
Args:
|
||||
image_batch(list): The image and label tuple list.
|
||||
|
||||
Return:
|
||||
numpy.ndarray: gradient of the cross-entropy loss w.r.t the image with
|
||||
the shape (height, width, channel).
|
||||
"""
|
||||
raise NotImplementedError
|
@ -1,114 +0,0 @@
|
||||
# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
|
||||
#
|
||||
#Licensed under the Apache License, Version 2.0 (the "License");
|
||||
#you may not use this file except in compliance with the License.
|
||||
#You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
#Unless required by applicable law or agreed to in writing, software
|
||||
#distributed under the License is distributed on an "AS IS" BASIS,
|
||||
#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
#See the License for the specific language governing permissions and
|
||||
#limitations under the License.
|
||||
from __future__ import absolute_import
|
||||
|
||||
import numpy as np
|
||||
import paddle.v2 as paddle
|
||||
import paddle.v2.fluid as fluid
|
||||
from paddle.v2.fluid.framework import program_guard
|
||||
|
||||
from .base import Model
|
||||
|
||||
|
||||
class PaddleModel(Model):
|
||||
"""
|
||||
Create a PaddleModel instance.
|
||||
When you need to generate a adversarial sample, you should construct an instance of PaddleModel.
|
||||
|
||||
Args:
|
||||
program(paddle.v2.fluid.framework.Program): The program of the model which generate the adversarial sample.
|
||||
input_name(string): The name of the input.
|
||||
logits_name(string): The name of the logits.
|
||||
predict_name(string): The name of the predict.
|
||||
cost_name(string): The name of the loss in the program.
|
||||
"""
|
||||
|
||||
def __init__(self,
|
||||
program,
|
||||
input_name,
|
||||
logits_name,
|
||||
predict_name,
|
||||
cost_name,
|
||||
bounds,
|
||||
channel_axis=3,
|
||||
preprocess=None):
|
||||
super(PaddleModel, self).__init__(
|
||||
bounds=bounds, channel_axis=channel_axis, preprocess=preprocess)
|
||||
|
||||
if preprocess is None:
|
||||
preprocess = (0, 1)
|
||||
|
||||
self._program = program
|
||||
self._place = fluid.CPUPlace()
|
||||
self._exe = fluid.Executor(self._place)
|
||||
|
||||
self._input_name = input_name
|
||||
self._logits_name = logits_name
|
||||
self._predict_name = predict_name
|
||||
self._cost_name = cost_name
|
||||
|
||||
# gradient
|
||||
loss = self._program.block(0).var(self._cost_name)
|
||||
param_grads = fluid.backward.append_backward(
|
||||
loss, parameter_list=[self._input_name])
|
||||
self._gradient = dict(param_grads)[self._input_name]
|
||||
|
||||
def predict(self, image_batch):
|
||||
"""
|
||||
Predict the label of the image_batch.
|
||||
|
||||
Args:
|
||||
image_batch(list): The image and label tuple list.
|
||||
Return:
|
||||
numpy.ndarray: predictions of the images with shape (batch_size, num_of_classes).
|
||||
"""
|
||||
feeder = fluid.DataFeeder(
|
||||
feed_list=[self._input_name, self._logits_name],
|
||||
place=self._place,
|
||||
program=self._program)
|
||||
predict_var = self._program.block(0).var(self._predict_name)
|
||||
predict = self._exe.run(self._program,
|
||||
feed=feeder.feed(image_batch),
|
||||
fetch_list=[predict_var])
|
||||
return predict
|
||||
|
||||
def num_classes(self):
|
||||
"""
|
||||
Calculate the number of classes of the output label.
|
||||
|
||||
Return:
|
||||
int: the number of classes
|
||||
"""
|
||||
predict_var = self._program.block(0).var(self._predict_name)
|
||||
assert len(predict_var.shape) == 2
|
||||
return predict_var.shape[1]
|
||||
|
||||
def gradient(self, image_batch):
|
||||
"""
|
||||
Calculate the gradient of the loss w.r.t the input.
|
||||
|
||||
Args:
|
||||
image_batch(list): The image and label tuple list.
|
||||
Return:
|
||||
list: The list of the gradient of the image.
|
||||
"""
|
||||
feeder = fluid.DataFeeder(
|
||||
feed_list=[self._input_name, self._logits_name],
|
||||
place=self._place,
|
||||
program=self._program)
|
||||
|
||||
grad, = self._exe.run(self._program,
|
||||
feed=feeder.feed(image_batch),
|
||||
fetch_list=[self._gradient])
|
||||
return grad
|
@ -1,99 +0,0 @@
|
||||
# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
|
||||
#
|
||||
#Licensed under the Apache License, Version 2.0 (the "License");
|
||||
#you may not use this file except in compliance with the License.
|
||||
#You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
#Unless required by applicable law or agreed to in writing, software
|
||||
#distributed under the License is distributed on an "AS IS" BASIS,
|
||||
#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
#See the License for the specific language governing permissions and
|
||||
#limitations under the License.
|
||||
"""
|
||||
CNN on mnist data using fluid api of paddlepaddle
|
||||
"""
|
||||
import paddle.v2 as paddle
|
||||
import paddle.v2.fluid as fluid
|
||||
|
||||
|
||||
def mnist_cnn_model(img):
|
||||
"""
|
||||
Mnist cnn model
|
||||
|
||||
Args:
|
||||
img(Varaible): the input image to be recognized
|
||||
|
||||
Returns:
|
||||
Variable: the label prediction
|
||||
"""
|
||||
conv_pool_1 = fluid.nets.simple_img_conv_pool(
|
||||
input=img,
|
||||
num_filters=20,
|
||||
filter_size=5,
|
||||
pool_size=2,
|
||||
pool_stride=2,
|
||||
act='relu')
|
||||
|
||||
conv_pool_2 = fluid.nets.simple_img_conv_pool(
|
||||
input=conv_pool_1,
|
||||
num_filters=50,
|
||||
filter_size=5,
|
||||
pool_size=2,
|
||||
pool_stride=2,
|
||||
act='relu')
|
||||
|
||||
logits = fluid.layers.fc(input=conv_pool_2, size=10, act='softmax')
|
||||
return logits
|
||||
|
||||
|
||||
def main():
|
||||
"""
|
||||
Train the cnn model on mnist datasets
|
||||
"""
|
||||
img = fluid.layers.data(name='img', shape=[1, 28, 28], dtype='float32')
|
||||
label = fluid.layers.data(name='label', shape=[1], dtype='int64')
|
||||
logits = mnist_cnn_model(img)
|
||||
cost = fluid.layers.cross_entropy(input=logits, label=label)
|
||||
avg_cost = fluid.layers.mean(x=cost)
|
||||
optimizer = fluid.optimizer.Adam(learning_rate=0.01)
|
||||
optimizer.minimize(avg_cost)
|
||||
|
||||
accuracy = fluid.evaluator.Accuracy(input=logits, label=label)
|
||||
|
||||
BATCH_SIZE = 50
|
||||
PASS_NUM = 3
|
||||
ACC_THRESHOLD = 0.98
|
||||
LOSS_THRESHOLD = 10.0
|
||||
train_reader = paddle.batch(
|
||||
paddle.reader.shuffle(
|
||||
paddle.dataset.mnist.train(), buf_size=500),
|
||||
batch_size=BATCH_SIZE)
|
||||
|
||||
place = fluid.CPUPlace()
|
||||
exe = fluid.Executor(place)
|
||||
feeder = fluid.DataFeeder(feed_list=[img, label], place=place)
|
||||
exe.run(fluid.default_startup_program())
|
||||
|
||||
for pass_id in range(PASS_NUM):
|
||||
accuracy.reset(exe)
|
||||
for data in train_reader():
|
||||
loss, acc = exe.run(fluid.default_main_program(),
|
||||
feed=feeder.feed(data),
|
||||
fetch_list=[avg_cost] + accuracy.metrics)
|
||||
pass_acc = accuracy.eval(exe)
|
||||
print("pass_id=" + str(pass_id) + " acc=" + str(acc) + " pass_acc="
|
||||
+ str(pass_acc))
|
||||
if loss < LOSS_THRESHOLD and pass_acc > ACC_THRESHOLD:
|
||||
break
|
||||
|
||||
pass_acc = accuracy.eval(exe)
|
||||
print("pass_id=" + str(pass_id) + " pass_acc=" + str(pass_acc))
|
||||
fluid.io.save_params(
|
||||
exe, dirname='./mnist', main_program=fluid.default_main_program())
|
||||
print('train mnist done')
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
@ -1,100 +0,0 @@
|
||||
# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
|
||||
#
|
||||
#Licensed under the Apache License, Version 2.0 (the "License");
|
||||
#you may not use this file except in compliance with the License.
|
||||
#You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
#Unless required by applicable law or agreed to in writing, software
|
||||
#distributed under the License is distributed on an "AS IS" BASIS,
|
||||
#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
#See the License for the specific language governing permissions and
|
||||
#limitations under the License.
|
||||
"""
|
||||
FGSM demos on mnist using advbox tool.
|
||||
"""
|
||||
import paddle.v2 as paddle
|
||||
import paddle.v2.fluid as fluid
|
||||
import matplotlib.pyplot as plt
|
||||
import numpy as np
|
||||
|
||||
from advbox.models.paddle import PaddleModel
|
||||
from advbox.attacks.gradientsign import GradientSignAttack
|
||||
|
||||
|
||||
def cnn_model(img):
|
||||
"""
|
||||
Mnist cnn model
|
||||
Args:
|
||||
img(Varaible): the input image to be recognized
|
||||
Returns:
|
||||
Variable: the label prediction
|
||||
"""
|
||||
#conv1 = fluid.nets.conv2d()
|
||||
conv_pool_1 = fluid.nets.simple_img_conv_pool(
|
||||
input=img,
|
||||
num_filters=20,
|
||||
filter_size=5,
|
||||
pool_size=2,
|
||||
pool_stride=2,
|
||||
act='relu')
|
||||
|
||||
conv_pool_2 = fluid.nets.simple_img_conv_pool(
|
||||
input=conv_pool_1,
|
||||
num_filters=50,
|
||||
filter_size=5,
|
||||
pool_size=2,
|
||||
pool_stride=2,
|
||||
act='relu')
|
||||
|
||||
logits = fluid.layers.fc(input=conv_pool_2, size=10, act='softmax')
|
||||
return logits
|
||||
|
||||
|
||||
def main():
|
||||
"""
|
||||
Advbox demo which demonstrate how to use advbox.
|
||||
"""
|
||||
IMG_NAME = 'img'
|
||||
LABEL_NAME = 'label'
|
||||
|
||||
img = fluid.layers.data(name=IMG_NAME, shape=[1, 28, 28], dtype='float32')
|
||||
# gradient should flow
|
||||
img.stop_gradient = False
|
||||
label = fluid.layers.data(name=LABEL_NAME, shape=[1], dtype='int64')
|
||||
logits = cnn_model(img)
|
||||
cost = fluid.layers.cross_entropy(input=logits, label=label)
|
||||
avg_cost = fluid.layers.mean(x=cost)
|
||||
|
||||
place = fluid.CPUPlace()
|
||||
exe = fluid.Executor(place)
|
||||
|
||||
BATCH_SIZE = 1
|
||||
train_reader = paddle.batch(
|
||||
paddle.reader.shuffle(
|
||||
paddle.dataset.mnist.train(), buf_size=500),
|
||||
batch_size=BATCH_SIZE)
|
||||
feeder = fluid.DataFeeder(
|
||||
feed_list=[IMG_NAME, LABEL_NAME],
|
||||
place=place,
|
||||
program=fluid.default_main_program())
|
||||
|
||||
fluid.io.load_params(
|
||||
exe, "./mnist/", main_program=fluid.default_main_program())
|
||||
|
||||
# advbox demo
|
||||
m = PaddleModel(fluid.default_main_program(), IMG_NAME, LABEL_NAME,
|
||||
logits.name, avg_cost.name, (-1, 1))
|
||||
att = GradientSignAttack(m)
|
||||
for data in train_reader():
|
||||
# fgsm attack
|
||||
adv_img = att(data)
|
||||
plt.imshow(n[0][0], cmap='Greys_r')
|
||||
plt.show()
|
||||
#np.save('adv_img', adv_img)
|
||||
break
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
@ -0,0 +1,121 @@
|
||||
## Add Kernels for a New Device
|
||||
|
||||
### Background
|
||||
|
||||
PaddlePaddle Fluid have hundreds of operators. Each operator could have one or more kernels. A kernel is an implementation of the operator for a certain device, which could be a hardware device, e.g., the CUDA GPU, or a library that utilizes a device, e.g., Intel MKL that makes full use of the Xeon CPU.
|
||||
|
||||
[This document](https://github.com/PaddlePaddle/Paddle/blob/develop/doc/howto/dev/new_op_en.md) explains how to add an operator, and its kernels. The kernels of an operator are indexed by a C++ type [`OpKernelType`](https://github.com/PaddlePaddle/Paddle/blob/develop/doc/design/operator_kernel_type.md). An operator chooses the right kernel at runtime. This choosing mechanism is described [here](https://github.com/PaddlePaddle/Paddle/blob/develop/doc/design/switch_kernel.md).
|
||||
|
||||
### Write Kernels for A New Device
|
||||
|
||||
#### Add A New Device
|
||||
|
||||
For some historical reaons, we misuse the word *library* for *device*. For example, we call the deivce type by *library type*. An example is the header file [`library_type.h`](https://github.com/PaddlePaddle/Paddle/blob/develop/paddle/framework/library_type.h#L24). We will correct this ASAP.
|
||||
|
||||
To register a new device, we need to add an enum value to `LibraryType`:
|
||||
|
||||
```
|
||||
enum class LibraryType {
|
||||
kPlain = 0,
|
||||
kMKLDNN = 1,
|
||||
kCUDNN = 2,
|
||||
};
|
||||
```
|
||||
|
||||
|
||||
#### Add A New [Place](https://github.com/PaddlePaddle/Paddle/blob/develop/paddle/platform/place.h#L53)
|
||||
|
||||
If you have a new kind of Device, firstly you need to add a new kind of [`Place`](https://github.com/PaddlePaddle/Paddle/blob/develop/paddle/platform/place.h#L53). For example `CUDAPlace`:
|
||||
|
||||
```cpp
|
||||
struct CUDAPlace {
|
||||
CUDAPlace() : CUDAPlace(0) {}
|
||||
explicit CUDAPlace(int d) : device(d) {}
|
||||
|
||||
inline int GetDeviceId() const { return device; }
|
||||
// needed for variant equality comparison
|
||||
inline bool operator==(const CUDAPlace &o) const {
|
||||
return device == o.device;
|
||||
}
|
||||
inline bool operator!=(const CUDAPlace &o) const { return !(*this == o); }
|
||||
|
||||
int device;
|
||||
};
|
||||
|
||||
typedef boost::variant<CUDAPlace, CPUPlace> Place;
|
||||
```
|
||||
|
||||
#### Add [device context]((https://github.com/PaddlePaddle/Paddle/blob/develop/paddle/platform/device_context.h#L37))
|
||||
After a new kind of Device is added, you should add a corresponding [DeviceContext](https://github.com/PaddlePaddle/Paddle/blob/develop/paddle/platform/device_context.h#L37) for it.
|
||||
|
||||
```cpp
|
||||
class DeviceContext {
|
||||
public:
|
||||
virtual ~DeviceContext() {}
|
||||
virtual Place GetPlace() const = 0;
|
||||
|
||||
virtual void Wait() const {}
|
||||
};
|
||||
```
|
||||
|
||||
#### Implement new [OpKernel](https://github.com/PaddlePaddle/Paddle/blob/develop/paddle/framework/operator.h#L351) for your Device.
|
||||
|
||||
A detailed documentation can be found in [`new_op_and_kernel`](https://github.com/PaddlePaddle/Paddle/blob/develop/doc/howto/dev/new_op_en.md)
|
||||
|
||||
```cpp
|
||||
class OpKernelBase {
|
||||
public:
|
||||
/**
|
||||
* ExecutionContext is the only parameter of Kernel Run function.
|
||||
* Run will get input/output variables, state such as momentum and
|
||||
* device resource such as CUDA stream, cublas handle, etc. from
|
||||
* ExecutionContext. User should construct it before run the Operator.
|
||||
*/
|
||||
|
||||
virtual void Compute(const ExecutionContext& context) const = 0;
|
||||
|
||||
virtual ~OpKernelBase() = default;
|
||||
};
|
||||
|
||||
template <typename T>
|
||||
class OpKernel : public OpKernelBase {
|
||||
public:
|
||||
using ELEMENT_TYPE = T;
|
||||
};
|
||||
```
|
||||
|
||||
|
||||
#### Register the OpKernel to framework
|
||||
|
||||
After writing the components described above, we should register the kernel to the framework.
|
||||
|
||||
We use `REGISTER_OP_KERNEL` to do the registration.
|
||||
|
||||
```cpp
|
||||
REGISTER_OP_KERNEL(
|
||||
op_type,
|
||||
library_type,
|
||||
place_type,
|
||||
kernel0, kernel1, ...)
|
||||
```
|
||||
|
||||
kernel0, kernel1 are kernels that have the same `op_type`, `library_type`, `place_type` but different `data_types`.
|
||||
|
||||
take [`conv2d`]((https://github.com/PaddlePaddle/Paddle/blob/develop/paddle/operators/conv_cudnn_op.cu.cc#L318)) as an example:
|
||||
|
||||
```cpp
|
||||
REGISTER_OP_KERNEL(conv2d, CPU, paddle::platform::CPUPlace,
|
||||
paddle::operators::GemmConvKernel<paddle::platform::CPUDeviceContext, float>,
|
||||
paddle::operators::GemmConvKernel<paddle::platform::CPUDeviceContext, double>);
|
||||
|
||||
REGISTER_OP_KERNEL(conv2d, CUDNN, ::paddle::platform::CUDAPlace,
|
||||
paddle::operators::CUDNNConvOpKernel<float>,
|
||||
paddle::operators::CUDNNConvOpKernel<double>);
|
||||
```
|
||||
|
||||
In the code above:
|
||||
|
||||
- `conv2d` is the type/name of the operator
|
||||
- `CUDNN/CPU` is `library`
|
||||
- `paddle::platform::CUDAPlace/CPUPlace` is `place`
|
||||
- template parameter `float/double` on `CUDNNConvOpKernel<T>` is `data_type`.
|
@ -0,0 +1,140 @@
|
||||
# Fluid Distributed Training
|
||||
|
||||
## Introduction
|
||||
|
||||
In this article, we'll explain how to config and run distributed training jobs with PaddlePaddle Fluid in a bare metal cluster.
|
||||
|
||||
## Preparations
|
||||
|
||||
### Get your cluster ready
|
||||
|
||||
Prepare your computer nodes in the cluster. Nodes in this cluster can be of any specification that runs PaddlePaddle, and with a unique IP address assigned to it. Make sure they can communicate with each other.
|
||||
|
||||
### Have PaddlePaddle installed
|
||||
|
||||
PaddlePaddle must be installed on all nodes. If you have GPU cards on your nodes, be sure to properly install drivers and CUDA libraries.
|
||||
|
||||
PaddlePaddle build and installation guide can be found from [here](http://www.paddlepaddle.org/docs/develop/documentation/en/getstarted/build_and_install/index_en.html).
|
||||
|
||||
### Update training script
|
||||
|
||||
#### Non-cluster training script
|
||||
|
||||
Let's take [Deep Learning 101](http://www.paddlepaddle.org/docs/develop/book/01.fit_a_line/index.html)'s first chapter: "fit a line" as an example.
|
||||
|
||||
This demo's non-cluster version with fluid API is as follows:
|
||||
|
||||
``` python
|
||||
import paddle.v2 as paddle
|
||||
import paddle.v2.fluid as fluid
|
||||
|
||||
x = fluid.layers.data(name='x', shape=[13], dtype='float32')
|
||||
y_predict = fluid.layers.fc(input=x, size=1, act=None)
|
||||
y = fluid.layers.data(name='y', shape=[1], dtype='float32')
|
||||
|
||||
cost = fluid.layers.square_error_cost(input=y_predict, label=y)
|
||||
avg_cost = fluid.layers.mean(x=cost)
|
||||
|
||||
sgd_optimizer = fluid.optimizer.SGD(learning_rate=0.001)
|
||||
sgd_optimizer.minimize(avg_cost)
|
||||
|
||||
BATCH_SIZE = 20
|
||||
|
||||
train_reader = paddle.batch(
|
||||
paddle.reader.shuffle(
|
||||
paddle.dataset.uci_housing.train(), buf_size=500),
|
||||
batch_size=BATCH_SIZE)
|
||||
|
||||
place = fluid.CPUPlace()
|
||||
feeder = fluid.DataFeeder(place=place, feed_list=[x, y])
|
||||
exe = fluid.Executor(place)
|
||||
|
||||
exe.run(fluid.default_startup_program())
|
||||
|
||||
PASS_NUM = 100
|
||||
for pass_id in range(PASS_NUM):
|
||||
fluid.io.save_persistables(exe, "./fit_a_line.model/")
|
||||
fluid.io.load_persistables(exe, "./fit_a_line.model/")
|
||||
for data in train_reader():
|
||||
avg_loss_value, = exe.run(fluid.default_main_program(),
|
||||
feed=feeder.feed(data),
|
||||
fetch_list=[avg_cost])
|
||||
|
||||
if avg_loss_value[0] < 10.0:
|
||||
exit(0) # if avg cost less than 10.0, we think our code is good.
|
||||
exit(1)
|
||||
```
|
||||
|
||||
We created a simple fully connected neural networks training program and handed it to the fluid executor to run for 100 passes.
|
||||
|
||||
Now let's try to convert it to a distributed version to run in a cluster.
|
||||
|
||||
#### Introducing parameter server
|
||||
|
||||
As you see from the non-cluster version of training script, there is only one role in it: the trainer, who does the computing as well as holding parameters. In cluster training, since multi-trainers are working on the same task, they need one centralized place to hold and distribute parameters. This centralized place is called the Parameter Server in PaddlePaddle.
|
||||
|
||||
![parameter server architect](src/trainer.png)
|
||||
|
||||
Parameter Server in fluid does not only hold parameters but is also assigned with a part of the program. Trainers communicate with parameter servers via send/receive OPs. For more tech detail, please refer to this [document](https://github.com/PaddlePaddle/Paddle/blob/develop/doc/design/dist_refactor/distributed_architecture.md).
|
||||
|
||||
Now we need to create program for both trainers and parameter servers, the question is how?
|
||||
|
||||
#### Slice the program
|
||||
|
||||
Fluid provides a tool called "Distribute Transpiler" to automatically convert the non-cluster program into cluster program.
|
||||
|
||||
The idea behind this tool is to find optimize OPs and gradient parameters, slice the program into 2 pieces and connect them with send/receive OP.
|
||||
|
||||
Optimize OPs and gradient parameters can be found from the return values of optimizer's minimize function.
|
||||
|
||||
To put them together:
|
||||
|
||||
``` python
|
||||
... #define the program, cost, and create sgd optimizer
|
||||
|
||||
optimize_ops, params_grads = sgd_optimizer.minimize(avg_cost) #get optimize OPs and gradient parameters
|
||||
|
||||
t = fluid.DistributeTranspiler() # create transpiler instance
|
||||
# slice the program into 2 pieces with optimizer_ops and gradient parameters list, as well as pserver_endpoints, which is a comma separated list of [IP:PORT] and number of trainers
|
||||
t.transpile(optimize_ops, params_grads, pservers=pserver_endpoints, trainers=2)
|
||||
|
||||
... #create executor
|
||||
|
||||
# in pserver, run this
|
||||
#current_endpoint here means current pserver IP:PORT you wish to run on
|
||||
pserver_prog = t.get_pserver_program(current_endpoint)
|
||||
pserver_startup = t.get_startup_program(current_endpoint, pserver_prog)
|
||||
exe.run(pserver_startup)
|
||||
exe.run(pserver_prog)
|
||||
|
||||
# in trainer, run this
|
||||
... # define data reader
|
||||
exe.run(fluid.default_startup_program())
|
||||
for pass_id in range(100):
|
||||
for data in train_reader():
|
||||
exe.run(t.get_trainer_program())
|
||||
|
||||
|
||||
```
|
||||
|
||||
### E2E demo
|
||||
|
||||
Please find the complete demo from [here](https://github.com/PaddlePaddle/Paddle/blob/develop/python/paddle/v2/fluid/tests/book_distribute/notest_dist_fit_a_line.py). In parameter server node run this in the command line:
|
||||
|
||||
``` bash
|
||||
PSERVERS=192.168.1.2:6174 SERVER_ENDPOINT=192.168.1.2:6174 TRAINING_ROLE=PSERVER python notest_dist_fit_a_line.py
|
||||
```
|
||||
|
||||
*please note we assume that your parameter server runs at 192.168.1.2:6174*
|
||||
|
||||
Wait until the prompt `Server listening on 192.168.1.2:6174`
|
||||
|
||||
Then in 2 of your trainer node run this:
|
||||
|
||||
``` bash
|
||||
PSERVERS=192.168.1.2:6174 SERVER_ENDPOINT=192.168.1.2:6174 TRAINING_ROLE=TRAINER python notest_dist_fit_a_line.py
|
||||
```
|
||||
|
||||
*the reason you need to run this command twice in 2 nodes is: in the script we set the trainer count to be 2. You can change this setting on line 50*
|
||||
|
||||
Now you have 2 trainers and 1 parameter server up and running.
|
@ -1,5 +0,0 @@
|
||||
The tutorials in v1_api_tutorials are using v1_api currently, and will be upgraded to v2_api later.
|
||||
Thus, v1_api_tutorials is a temporary directory. We decide not to maintain it and will delete it in future.
|
||||
|
||||
Please go to [PaddlePaddle/book](https://github.com/PaddlePaddle/book) and
|
||||
[PaddlePaddle/models](https://github.com/PaddlePaddle/models) to learn PaddlePaddle.
|
Before Width: | Height: | Size: 67 KiB |
Before Width: | Height: | Size: 17 KiB |
@ -1,137 +0,0 @@
|
||||
# Generative Adversarial Networks (GAN)
|
||||
|
||||
This demo implements GAN training described in the original [GAN paper](https://arxiv.org/abs/1406.2661) and deep convolutional generative adversarial networks [DCGAN paper](https://arxiv.org/abs/1511.06434).
|
||||
|
||||
The high-level structure of GAN is shown in Figure. 1 below. It is composed of two major parts: a generator and a discriminator, both of which are based on neural networks. The generator takes in some kind of noise with a known distribution and transforms it into an image. The discriminator takes in an image and determines whether it is artificially generated by the generator or a real image. So the generator and the discriminator are in a competitive game in which generator is trying to generate image to look as real as possible to fool the discriminator, while the discriminator is trying to distinguish between real and fake images.
|
||||
|
||||
<center>![](./gan.png)</center>
|
||||
<p align="center">
|
||||
Figure 1. GAN-Model-Structure
|
||||
<a href="https://ishmaelbelghazi.github.io/ALI/">figure credit</a>
|
||||
</p>
|
||||
|
||||
The generator and discriminator take turn to be trained using SGD. The objective function of the generator is for its generated images being classified as real by the discriminator, and the objective function of the discriminator is to correctly classify real and fake images. When the GAN model is trained to converge to the equilibrium state, the generator will transform the given noise distribution to the distribution of real images, and the discriminator will not be able to distinguish between real and fake images at all.
|
||||
|
||||
## Implementation of GAN Model Structure
|
||||
Since GAN model involves multiple neural networks, it requires to use paddle python API. So the code walk-through below can also partially serve as an introduction to the usage of Paddle Python API.
|
||||
|
||||
There are three networks defined in gan_conf.py, namely **generator_training**, **discriminator_training** and **generator**. The relationship to the model structure we defined above is that **discriminator_training** is the discriminator, **generator** is the generator, and the **generator_training** combined the generator and discriminator since training generator would require the discriminator to provide loss function. This relationship is described in the following code:
|
||||
```python
|
||||
if is_generator_training:
|
||||
noise = data_layer(name="noise", size=noise_dim)
|
||||
sample = generator(noise)
|
||||
|
||||
if is_discriminator_training:
|
||||
sample = data_layer(name="sample", size=sample_dim)
|
||||
|
||||
if is_generator_training or is_discriminator_training:
|
||||
label = data_layer(name="label", size=1)
|
||||
prob = discriminator(sample)
|
||||
cost = cross_entropy(input=prob, label=label)
|
||||
classification_error_evaluator(
|
||||
input=prob, label=label, name=mode + '_error')
|
||||
outputs(cost)
|
||||
|
||||
if is_generator:
|
||||
noise = data_layer(name="noise", size=noise_dim)
|
||||
outputs(generator(noise))
|
||||
```
|
||||
|
||||
In order to train the networks defined in gan_conf.py, one first needs to initialize a Paddle environment, parse the config, create GradientMachine from the config and create trainer from GradientMachine as done in the code chunk below:
|
||||
```python
|
||||
import py_paddle.swig_paddle as api
|
||||
# init paddle environment
|
||||
api.initPaddle('--use_gpu=' + use_gpu, '--dot_period=10',
|
||||
'--log_period=100', '--gpu_id=' + args.gpu_id,
|
||||
'--save_dir=' + "./%s_params/" % data_source)
|
||||
|
||||
# Parse config
|
||||
gen_conf = parse_config(conf, "mode=generator_training,data=" + data_source)
|
||||
dis_conf = parse_config(conf, "mode=discriminator_training,data=" + data_source)
|
||||
generator_conf = parse_config(conf, "mode=generator,data=" + data_source)
|
||||
|
||||
# Create GradientMachine
|
||||
dis_training_machine = api.GradientMachine.createFromConfigProto(
|
||||
dis_conf.model_config)
|
||||
gen_training_machine = api.GradientMachine.createFromConfigProto(
|
||||
gen_conf.model_config)
|
||||
generator_machine = api.GradientMachine.createFromConfigProto(
|
||||
generator_conf.model_config)
|
||||
|
||||
# Create trainer
|
||||
dis_trainer = api.Trainer.create(dis_conf, dis_training_machine)
|
||||
gen_trainer = api.Trainer.create(gen_conf, gen_training_machine)
|
||||
```
|
||||
|
||||
In order to balance the strength between generator and discriminator, we schedule to train whichever one is performing worse by comparing their loss function value. The loss function value can be calculated by a forward pass through the GradientMachine.
|
||||
```python
|
||||
def get_training_loss(training_machine, inputs):
|
||||
outputs = api.Arguments.createArguments(0)
|
||||
training_machine.forward(inputs, outputs, api.PASS_TEST)
|
||||
loss = outputs.getSlotValue(0).copyToNumpyMat()
|
||||
return numpy.mean(loss)
|
||||
```
|
||||
|
||||
After training one network, one needs to sync the new parameters to the other networks. The code below demonstrates one example of such use case:
|
||||
```python
|
||||
# Train the gen_training
|
||||
gen_trainer.trainOneDataBatch(batch_size, data_batch_gen)
|
||||
|
||||
# Copy the parameters from gen_training to dis_training and generator
|
||||
copy_shared_parameters(gen_training_machine,
|
||||
dis_training_machine)
|
||||
copy_shared_parameters(gen_training_machine, generator_machine)
|
||||
```
|
||||
|
||||
|
||||
## A Toy Example
|
||||
With the infrastructure explained above, we can now walk you through a toy example of generating two dimensional uniform distribution using 10 dimensional Gaussian noise.
|
||||
|
||||
The Gaussian noises are generated using the code below:
|
||||
```python
|
||||
def get_noise(batch_size, noise_dim):
|
||||
return numpy.random.normal(size=(batch_size, noise_dim)).astype('float32')
|
||||
```
|
||||
|
||||
The real samples (2-D uniform) are generated using the code below:
|
||||
```python
|
||||
# synthesize 2-D uniform data in gan_trainer.py:114
|
||||
def load_uniform_data():
|
||||
data = numpy.random.rand(1000000, 2).astype('float32')
|
||||
return data
|
||||
```
|
||||
|
||||
The generator and discriminator network are built using fully-connected layer and batch_norm layer, and are defined in gan_conf.py.
|
||||
|
||||
To train the GAN model, one can use the command below. The flag -d specifies the training data (cifar, mnist or uniform) and flag --useGpu specifies whether to use gpu for training (0 is cpu, 1 is gpu).
|
||||
```bash
|
||||
$python gan_trainer.py -d uniform --useGpu 1
|
||||
```
|
||||
The generated samples can be found in ./uniform_samples/ and one example is shown below as Figure 2. One can see that it roughly recovers the 2D uniform distribution.
|
||||
|
||||
<center>![](./uniform_sample.png)</center>
|
||||
<p align="center">
|
||||
Figure 2. Uniform Sample
|
||||
</p>
|
||||
|
||||
## MNIST Example
|
||||
### Data preparation
|
||||
To download the MNIST data, one can use the following commands:
|
||||
```bash
|
||||
$cd data/
|
||||
$./get_mnist_data.sh
|
||||
```
|
||||
|
||||
### Model description
|
||||
Following the DC-Gan paper (https://arxiv.org/abs/1511.06434), we use convolution/convolution-transpose layer in the discriminator/generator network to better deal with images. The details of the network structures are defined in gan_conf_image.py.
|
||||
|
||||
### Training the model
|
||||
To train the GAN model on mnist data, one can use the following command:
|
||||
```bash
|
||||
$python gan_trainer.py -d mnist --useGpu 1
|
||||
```
|
||||
The generated sample images can be found at ./mnist_samples/ and one example is shown below as Figure 3.
|
||||
<center>![](./mnist_sample.png)</center>
|
||||
<p align="center">
|
||||
Figure 3. MNIST Sample
|
||||
</p>
|
Before Width: | Height: | Size: 28 KiB |
Before Width: | Height: | Size: 24 KiB |
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in new issue