commit
d264585131
@ -0,0 +1,79 @@
|
||||
# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
from paddle.trainer_config_helpers import *
|
||||
|
||||
is_predict = get_config_arg("is_predict", bool, False)
|
||||
|
||||
####################Data Configuration ##################
|
||||
|
||||
if not is_predict:
|
||||
data_dir = './data/'
|
||||
define_py_data_sources2(
|
||||
train_list=data_dir + 'train.list',
|
||||
test_list=data_dir + 'test.list',
|
||||
module='mnist_provider',
|
||||
obj='process')
|
||||
|
||||
######################Algorithm Configuration #############
|
||||
settings(batch_size=50, learning_rate=0.001, learning_method=AdamOptimizer())
|
||||
|
||||
#######################Network Configuration #############
|
||||
|
||||
data_size = 1 * 28 * 28
|
||||
label_size = 10
|
||||
img = data_layer(name='pixel', size=data_size)
|
||||
|
||||
|
||||
# light cnn
|
||||
# A shallower cnn model: [CNN, BN, ReLU, Max-Pooling] x4 + FC x1
|
||||
# Easier to train for mnist dataset and quite efficient
|
||||
# Final performance is close to deeper ones on tasks such as digital and character classification
|
||||
def light_cnn(input_image, num_channels, num_classes):
|
||||
def __light__(ipt,
|
||||
num_filter=128,
|
||||
times=1,
|
||||
conv_filter_size=3,
|
||||
dropouts=0,
|
||||
num_channels_=None):
|
||||
return img_conv_group(
|
||||
input=ipt,
|
||||
num_channels=num_channels_,
|
||||
pool_size=2,
|
||||
pool_stride=2,
|
||||
conv_padding=0,
|
||||
conv_num_filter=[num_filter] * times,
|
||||
conv_filter_size=conv_filter_size,
|
||||
conv_act=ReluActivation(),
|
||||
conv_with_batchnorm=True,
|
||||
conv_batchnorm_drop_rate=dropouts,
|
||||
pool_type=MaxPooling())
|
||||
|
||||
tmp = __light__(input_image, num_filter=128, num_channels_=num_channels)
|
||||
tmp = __light__(tmp, num_filter=128)
|
||||
tmp = __light__(tmp, num_filter=128)
|
||||
tmp = __light__(tmp, num_filter=128, conv_filter_size=1)
|
||||
|
||||
tmp = fc_layer(input=tmp, size=num_classes, act=SoftmaxActivation())
|
||||
return tmp
|
||||
|
||||
|
||||
predict = light_cnn(input_image=img, num_channels=1, num_classes=label_size)
|
||||
|
||||
if not is_predict:
|
||||
lbl = data_layer(name="label", size=label_size)
|
||||
inputs(img, lbl)
|
||||
outputs(classification_cost(input=predict, label=lbl))
|
||||
else:
|
||||
outputs(predict)
|
@ -0,0 +1,91 @@
|
||||
# Design Doc: Master Server
|
||||
|
||||
For an overview of master server's role, please refer to [distributed training design doc](./README.md). In this design doc we will discuss the master server in more details. The master will be implemented in [Go](https://golang.org/).
|
||||
|
||||
## Dataset
|
||||
|
||||
<img src="src/dataset.png"/>
|
||||
|
||||
A dataset is a list of files in *RecordIO* format. A RecordIO file consists of chunks, whereas each chunk consists some records.
|
||||
|
||||
## Task Queue
|
||||
|
||||
As mentioned in [distributed training design doc](./README.md), a *task* is a data shard that the master server assigns to the trainer process to train on. A task consists of one or multiple *blocks* from one or multiple files. The master server maintains *task queues* to track the training progress.
|
||||
|
||||
### Task Queue Creation
|
||||
|
||||
1. Each trainer will make an RPC call (using Go's [rpc](https://golang.org/pkg/net/rpc/) package) to the master server, telling it the RecordIO files representing the dataset specified by the user. Since every trainer will tell the master server the same dataset, only the first RPC call will be honored.
|
||||
|
||||
The RPC interface is:
|
||||
```go
|
||||
func (m *RPCServer) ReportDataset(Paths []string, dummy *int) error {
|
||||
}
|
||||
```
|
||||
1. The master server will scan through each RecordIO file to generate the *block index* and know how many blocks does each file have. A block can be referenced by the file path and the index of the block within the file. The block index is in memory data structure that enables fast access to each block, and the index of the block with the file is an integer start from 0, representing the n-th block within the file.
|
||||
|
||||
The definition of the block is:
|
||||
```go
|
||||
type Block struct {
|
||||
Idx int // index of the block within the file
|
||||
Path string
|
||||
Index recordio.Index // block index
|
||||
}
|
||||
```
|
||||
1. Blocks are grouped into tasks, and tasks are filled into the todo queue. The pending queue and the done queue are initialized with no element.
|
||||
|
||||
The definition of the task is:
|
||||
```go
|
||||
type Task struct {
|
||||
Index int
|
||||
Blocks []Block
|
||||
}
|
||||
```
|
||||
|
||||
The elements in the tasks queues is of type `TaskEntry`, containing a timeout counter (described in [task retry logic](#task-retry-logic)), and a task:
|
||||
```go
|
||||
type TaskEntry struct {
|
||||
NumTimeout int
|
||||
Task Task
|
||||
}
|
||||
```
|
||||
|
||||
The definition of task queues is:
|
||||
```go
|
||||
type TaskQueues struct {
|
||||
Todo []TaskEntry
|
||||
Pending map[int]TaskEntry // map from task index to task entry
|
||||
Done []TaskEntry
|
||||
}
|
||||
```
|
||||
|
||||
### Task Queue Persistence
|
||||
|
||||
The task queues need to be persisted on [etcd](https://github.com/coreos/etcd) for fault recovery. Since the task queues only change once a task is completed or timed out, which is not very frequent, we can afford to synchronize with etcd every time the task queues change.
|
||||
|
||||
We will serialize the task queues data structure with [gob encoding](https://golang.org/pkg/encoding/gob/), compress with gzip, and save into etcd synchronously under key `/task_queues`.
|
||||
|
||||
### Task Dispatch
|
||||
|
||||
The trainer will make an RPC call to master to get a new task when:
|
||||
|
||||
- the trainer first started, or
|
||||
- the trainer finishes a task.
|
||||
|
||||
The RPC interface is:
|
||||
```go
|
||||
func (m *RPCServer) GetTask(finished *Task, result *Task) error {
|
||||
}
|
||||
```
|
||||
Argument `finished` will be `nil` when the trainer is just started.
|
||||
|
||||
During the RPC call the master will do the following:
|
||||
|
||||
- Make a copy of the task queues, and update the copy reflecting the finished tasks and the new pending tasks.
|
||||
- Synchronize the copy of task queues with etcd using a transaction conditioned on holding the master lock.
|
||||
- Replace the task queues with the copy and report to the trainer with the new tasks if succeeded, or discard the copy and report the error to the trainer if failed.
|
||||
|
||||
### Task Retry Logic
|
||||
|
||||
When a task is dispatched to the trainer, the master will schedule a function for execution after the timeout duration (based on the moving average of task completion time). If the task entry in still in the pending queue, its timeout counter will increase by one, and the task will be moved to todo queue. If the timeout counter is above the threshold, the master will log the error and discard the task.
|
||||
|
||||
Please note that since a timed out task could be completed after it has been dispatched for retry, so it is possible for a task to be processed multiple times. We do not try to prevent it from happening since it's fine to train on the same task multiple times due to the stochastic nature of the stochastic gradient decent algorithm.
|
@ -0,0 +1,157 @@
|
||||
# Design Doc: The Client Library of Parameter Server
|
||||
|
||||
For an overview of trainer's role, please refer to [distributed training design doc](README.md). In this design doc, we will discuss the parameter server's client library, which will manage communication with parameter servers. The library will be implemented in [Go](https://golang.org/) and made available as a static or dynamic library with a C header file.
|
||||
|
||||
## Parameter Partition
|
||||
|
||||
Each parameter will be partitioned into parameter blocks to make the parameters evenly distributed on parameter servers. The partition is done automatically by the client library. The *sparse parameter* require a little different treatment:
|
||||
|
||||
### Sparse Parameter
|
||||
|
||||
The sparse parameter is a parameter that is updated sparsely. The name is somewhat misleading, it does not have a sparse representation, it has the same representation as a dense vector.
|
||||
|
||||
Because a sparse parameter is updated sparsely, the trainer will have to partition the sparse parameter. Because the parameter server will merge all sparse parameter shard into the same file when saving the parameter. It needs special naming convention:
|
||||
|
||||
If a sparse parameter is partitioned into n shards, they should be named as:
|
||||
|
||||
```text
|
||||
name:sparse-0
|
||||
name:sparse-1
|
||||
...
|
||||
name:sparse-n-1
|
||||
```
|
||||
|
||||
The library is unaware of the partition, and treat each parameter independently. Only when saving parameters, the parameter servers will merge the sparse parameters according to the naming convention.
|
||||
|
||||
## Model Optimization Using Gradients
|
||||
|
||||
There are two ways to perform model optimization using gradients:
|
||||
|
||||
- On Client
|
||||
|
||||
The client does multiple steps of forward and backward update. In each step, the gradients are calculated and a new model is generated. After some steps, the client will calculate the difference between the newest model and the old model at step 0. The difference will be updated to parameter servers. Parameter servers will just update parameters using the difference without any optimization using gradients (such as Adam and L1 regularization).
|
||||
|
||||
- On Parameter Server
|
||||
|
||||
The client will send accumulated gradients to parameter servers, the parameter server will do the optimization using gradients.
|
||||
|
||||
## L1 and L2 Regularization
|
||||
|
||||
PaddlePaddle allows L1 or L2 regularizations to be specified per parameter, so when the trainer initializes the parameter it needs include a parameter configuration when L1 or L2 regularization is necessary.
|
||||
|
||||
## Parameter Initialization
|
||||
|
||||
The parameters on parameter servers need to be initialized. To provide maximum flexibility, the trainer will initialize the parameters. Only one trainer will do the initialization, the other trainers will wait for the completion of initialization and get the parameters from the parameter servers.
|
||||
|
||||
### Trainer Selection
|
||||
|
||||
To select the trainer for initialization, every trainer will try to get a distributed lock, whoever owns the lock will do the initialization. As illustrated below:
|
||||
|
||||
<img src="./src/init_lock.png">
|
||||
|
||||
### Trainer Selection Process
|
||||
|
||||
The trainer select process is encapsulated in the C API function:
|
||||
```c
|
||||
int paddle_begin_init_params(paddle_pserver_client* client, const char* config_proto);
|
||||
```
|
||||
The selected trainer's call to `paddle_begin_init_params` will return with 1, and the other trainers' call to `paddle_begin_init_params` will block until initialization is done, and return 0. As illustrated below:
|
||||
|
||||
<img src="./src/pserver_init.png">
|
||||
|
||||
## C Interface
|
||||
|
||||
```c
|
||||
typedef enum {
|
||||
PADDLE_ELEMENT_TYPE_INT32 = 0,
|
||||
PADDLE_ELEMENT_TYPE_UINT32 = 1,
|
||||
PADDLE_ELEMENT_TYPE_INT64 = 2,
|
||||
PADDLE_ELEMENT_TYPE_UINT64 = 3,
|
||||
PADDLE_ELEMENT_TYPE_FLOAT32 = 4,
|
||||
PADDLE_ELEMENT_TYPE_FLOAT64 = 5,
|
||||
} paddle_element_type;
|
||||
|
||||
typedef struct {
|
||||
char* name;
|
||||
paddle_element_type element_type;
|
||||
void* content;
|
||||
int content_len;
|
||||
} paddle_parameter, paddle_gradient;
|
||||
|
||||
typedef struct paddle_pserver_client paddle_pserver_client;
|
||||
|
||||
paddle_pserver_client* paddle_new_pserver_client();
|
||||
void paddle_pserver_client_release(paddle_pserver_client* client);
|
||||
|
||||
/**
|
||||
* @brief paddle_begin_init_params begins to initialize parameters on
|
||||
* parameter servers.
|
||||
*
|
||||
* paddle_begin_init_params will be called from multiple trainers,
|
||||
* only one trainer will be selected to initialize the parameters on
|
||||
* parameter servers. Other trainers will be blocked until the
|
||||
* initialization is done, and they need to get the initialized
|
||||
* parameters from parameter servers using @paddle_get_params.
|
||||
*
|
||||
* @param pserver_config_proto serialized parameter server configuration in
|
||||
* Protocol Buffers format.
|
||||
* @return 1 if the trainer is selected to initialize parameter
|
||||
* servers, otherwise 0.
|
||||
*/
|
||||
int paddle_begin_init_params(paddle_pserver_client* client, const char* pserver_config_proto);
|
||||
|
||||
/**
|
||||
* @brief paddle_init_param initializes the parameter on parameter
|
||||
* servers.
|
||||
*
|
||||
* @param param the parameter to initialize.
|
||||
* @param param_config_proto the configuration for the parameter.
|
||||
* @return 0 if successful, otherwise -1. On failure, the trainer
|
||||
* needs to restart the entire initialization process (starting from
|
||||
* @paddle_begin_init_param). Or simply exit the program and wait for
|
||||
* the cluster management system to restart the trainer.
|
||||
*/
|
||||
int paddle_init_param(paddle_pserver_client* client, paddle_parameter params, const char* param_config_proto);
|
||||
|
||||
/**
|
||||
* @brief paddle_finish_init_params tells parameter servers client has
|
||||
* sent all parameters to parameter servers as initialization.
|
||||
*
|
||||
* @return 0 if successful, otherwise -1. On failure, the trainer
|
||||
* needs to restart the entire initialization process (starting from
|
||||
* @paddle_begin_init_param). Or simply exit the program and wait for
|
||||
* the cluster management system to restart the trainer.
|
||||
*/
|
||||
int paddle_finish_init_params(paddle_pserver_client* client);
|
||||
|
||||
/**
|
||||
* @brief paddle_send_grads sends gradients to parameter servers for
|
||||
* updating parameters.
|
||||
*
|
||||
* @param grads the array of gradients to send.
|
||||
* @param len the length of the gradient array.
|
||||
* @param learning_rate the learning rate for the gradients.
|
||||
* @return 0 if successful, otherwise -1.
|
||||
*/
|
||||
int paddle_send_grads(paddle_pserver_client* client, const paddle_gradient* grads, int len);
|
||||
|
||||
/**
|
||||
* @brief paddle_get_params gets parameters from parameter servers.
|
||||
*
|
||||
* @param names the array of names of the parameters to get.
|
||||
* @param dst the destination array of parameters to save to.
|
||||
* @param len the length of the names array and the paddle_parameter
|
||||
* array.
|
||||
* @return 0 if successful, otherwise -1.
|
||||
*/
|
||||
int paddle_get_params(paddle_pserver_client* client, const char** names, paddle_parameter* dst, int len);
|
||||
|
||||
/**
|
||||
* @brief paddle_save_model indicates parameters to save the parameter
|
||||
* to the given path
|
||||
*
|
||||
* @param path the path to save parameters.
|
||||
* @return 0 if successful, otherwise -1.
|
||||
*/
|
||||
int paddle_save_model(paddle_pserver_client* client, const char* path);
|
||||
```
|
Binary file not shown.
After Width: | Height: | Size: 11 KiB |
Binary file not shown.
After Width: | Height: | Size: 42 KiB |
Binary file not shown.
After Width: | Height: | Size: 26 KiB |
Binary file not shown.
After Width: | Height: | Size: 28 KiB |
Binary file not shown.
After Width: | Height: | Size: 52 KiB |
@ -0,0 +1,127 @@
|
||||
# Submit a Distributed Training Job
|
||||
|
||||
The user can submit a distributed training job with Python code, rather than with a command-line interface.
|
||||
|
||||
## Runtime Environment On Kubernetes
|
||||
|
||||
For a distributed training job, there is two Docker image called *runtime Docker image* and *base Docker image*. The runtime Docker image is the Docker image that gets scheduled by Kubernetes to run during training. The base Docker image is for building the runtime Docker image.
|
||||
|
||||
### Base Docker Image
|
||||
|
||||
Usually, the base Docker image is PaddlePaddle product Docker image including paddle binary files and python package. And of course, users can specify any image name hosted on any docker registry which users have the access right.
|
||||
|
||||
### Runtime Docker Image
|
||||
|
||||
The trainer package which user upload and some Python dependencies are packaged into a runtime Docker image based on base Docker image.
|
||||
|
||||
- Handle Python Dependencies
|
||||
|
||||
You need to provide requirements.txt file in your `trainer-package` folder. Example:
|
||||
|
||||
```txt
|
||||
pillow
|
||||
protobuf==3.1.0
|
||||
```
|
||||
More [details](https://pip.readthedocs.io/en/1.1/requirements.html) about requirements, an example project looks like:
|
||||
```bash
|
||||
paddle_example
|
||||
|-quick_start
|
||||
|-trainer.py
|
||||
|-dataset.py
|
||||
|-requirements.txt
|
||||
```
|
||||
|
||||
## Submit Distributed Training Job With Python Code
|
||||
<img src="./src/submit-job.png" width="800">
|
||||
|
||||
- `paddle.job.dist_train()` will call the Job Server API `/v1/packages` to upload the trainer package and save them on CephFS, and then call `/v1/trainer/job` to submit the PaddlePaddle distributed job.
|
||||
- `/v1/trainer/job` will start a building job for preparing the runtime Docker image. When the building job is finished, Job Server will submit the PaddlePaddle distributed job to Kubernetes.
|
||||
- *NOTE*: For the first version, we will not prepare the runtime Docker image, instead, the package is uploaded to Paddle Cloud, and Paddle Cloud will mount the package in a temporary folder into the base Docker image. We will not support custom Python dependencies in the first version as well.
|
||||
|
||||
You can call `paddle.job.dist_train` and provide distributed training configuration as the parameters:
|
||||
```python
|
||||
paddle.job.dist_train(
|
||||
trainer=dist_trainer(),
|
||||
paddle_job=PaddleJob(
|
||||
job_name = "paddle-cloud",
|
||||
entry_point = "python %s"%__file__,
|
||||
trainer_package = "/example/word2vec",
|
||||
image = "yancey1989/paddle-job",
|
||||
trainers = 10,
|
||||
pservers = 3,
|
||||
trainer_cpu = 1,
|
||||
trainer_gpu = 1,
|
||||
trainer_mem = "10G",
|
||||
pserver_cpu = 1,
|
||||
pserver_mem = "2G"
|
||||
))
|
||||
```
|
||||
|
||||
The parameter `trainer` of `paddle.job.dist_train` is a function and you can implement it as follows:
|
||||
```python
|
||||
def dist_trainer():
|
||||
def trainer_creator():
|
||||
trainer = paddle.v2.trainer.SGD(...)
|
||||
trainer.train(...)
|
||||
return trainer_creator
|
||||
```
|
||||
|
||||
The pseudo code of `paddle.job.dist_train` is as follows:
|
||||
```python
|
||||
def dist_train(trainer, paddle_job):
|
||||
# if the code is running on cloud, set PADDLE_ON_CLOUD=YES
|
||||
if os.getenv("RUNNING_ON_CLOUD", "NO") == "NO":
|
||||
#submit the paddle job
|
||||
paddle_job.submit()
|
||||
else:
|
||||
#start the training
|
||||
trainer()
|
||||
```
|
||||
### PaddleJob Parameters
|
||||
parameter | type | explanation
|
||||
--- | --- | ---
|
||||
job_name | str | the unique name for the training job
|
||||
entry_point | str | entry point for startup trainer process
|
||||
trainer_package | str | trainer package file path which user have the access right
|
||||
image|str|the [base image](#base-docker-image) for building the [runtime image](#runtime-docker-image)
|
||||
pservers|int| Parameter Server process count
|
||||
trainers|int| Trainer process count
|
||||
pserver_cpu|int| CPU count for each Parameter Server process
|
||||
pserver_mem|str| memory allocated for each Parameter Server process, a plain integer using one of these suffixes: E, P, T, G, M, K
|
||||
trainer_cpu|int| CPU count for each Trainer process
|
||||
trainer_mem|str| memory allocated for each Trainer process, a plain integer using one of these suffixes: E, P, T, G, M, K
|
||||
trainer_gpu|int| GPU count for each Trainer process, if you only want CPU, do not set this parameter
|
||||
|
||||
### Deploy Parameter Server, Trainer and Master Process
|
||||
- Deploy PaddlePaddle Parameter Server processes, it's a Kubernetes ReplicaSet.
|
||||
- Deploy PaddlePaddle Trainer processes, it's a Kubernetes Job.
|
||||
- Deploy PaddlePaddle Master processes, it's a Kubernetes ReplicaSet.
|
||||
|
||||
## Job Server
|
||||
|
||||
- RESTful API
|
||||
|
||||
Job server provides RESTful HTTP API for receiving the trainer package and displaying
|
||||
PaddlePaddle job related informations.
|
||||
- `POST /v1/package` receive the trainer package and save them on CephFS
|
||||
- `POST /v1/trainer/job` submit a trainer job
|
||||
- `GET /v1/jobs/` list all jobs
|
||||
- `GET /v1/jobs/<job-name>` the status of a job
|
||||
- `DELETE /v1/jobs/<job-name>` delete a job
|
||||
- `GET /v1/version` job server version
|
||||
|
||||
- Build Runtime Docker Image on Kubernetes
|
||||
|
||||
`paddle.job.dist_train` will upload the trainer package to Job Server, save them on the distributed filesystem, and then start up a job for building the runtime Docker image that gets scheduled by Kubernetes to run during training.
|
||||
|
||||
There are some benefits for building runtime Docker image on JobServer:
|
||||
- On Paddle Cloud, users will run the trainer code in a Jupyter Notebook which is a Kubernetes Pod, if we want to execute `docker build` in the Pod, we should mount the host's `docker.sock` to the Pod, user's code will connect the host's Docker Engine directly, it's not safe.
|
||||
- Users only need to upload the training package files, does not need to install docker engine, docker registry as dependencies.
|
||||
- If we want to change another image type, such as RKT, users do not need to care about it.
|
||||
|
||||
- Deploy Parameter Server, Trainer and Master Processes
|
||||
|
||||
`POST /v1/trainer/job` receives the distributed training parameters, and deploy the job as follows:
|
||||
- Deploy PaddlePaddle Parameter Server processes, it's a Kubernetes ReplicaSet.
|
||||
- Deploy PaddlePaddle Trainer processes, it's a Kubernetes Job.
|
||||
- Deploy PaddlePaddle Master processes, it's a Kubernetes ReplicaSet.
|
@ -0,0 +1,52 @@
|
||||
import paddle.v2 as paddle
|
||||
import numpy as np
|
||||
|
||||
# init paddle
|
||||
paddle.init(use_gpu=False)
|
||||
|
||||
# network config
|
||||
x = paddle.layer.data(name='x', type=paddle.data_type.dense_vector(2))
|
||||
y_predict = paddle.layer.fc(input=x, size=1, act=paddle.activation.Linear())
|
||||
y = paddle.layer.data(name='y', type=paddle.data_type.dense_vector(1))
|
||||
cost = paddle.layer.mse_cost(input=y_predict, label=y)
|
||||
|
||||
# create parameters
|
||||
parameters = paddle.parameters.create(cost)
|
||||
# create optimizer
|
||||
optimizer = paddle.optimizer.Momentum(momentum=0)
|
||||
# create trainer
|
||||
trainer = paddle.trainer.SGD(cost=cost,
|
||||
parameters=parameters,
|
||||
update_equation=optimizer)
|
||||
|
||||
|
||||
# event_handler to print training info
|
||||
def event_handler(event):
|
||||
if isinstance(event, paddle.event.EndIteration):
|
||||
if event.batch_id % 1 == 0:
|
||||
print "Pass %d, Batch %d, Cost %f" % (event.pass_id, event.batch_id,
|
||||
event.cost)
|
||||
|
||||
|
||||
# define training dataset reader
|
||||
def train_reader():
|
||||
train_x = np.array([[1, 1], [1, 2], [3, 4], [5, 2]])
|
||||
train_y = np.array([-2, -3, -7, -7])
|
||||
|
||||
def reader():
|
||||
for i in xrange(train_y.shape[0]):
|
||||
yield train_x[i], train_y[i]
|
||||
|
||||
return reader
|
||||
|
||||
|
||||
# define feeding map
|
||||
feeding = {'x': 0, 'y': 1}
|
||||
|
||||
# training
|
||||
trainer.train(
|
||||
reader=paddle.batch(
|
||||
train_reader(), batch_size=1),
|
||||
feeding=feeding,
|
||||
event_handler=event_handler,
|
||||
num_passes=100)
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in new issue