commit
437c98d502
File diff suppressed because it is too large
Load Diff
Before Width: | Height: | Size: 232 KiB After Width: | Height: | Size: 116 KiB |
Before Width: | Height: | Size: 244 KiB After Width: | Height: | Size: 236 KiB |
@ -1,43 +0,0 @@
|
|||||||
apiVersion: batch/v1
|
|
||||||
kind: Job
|
|
||||||
metadata:
|
|
||||||
name: paddle-cluster-job
|
|
||||||
spec:
|
|
||||||
parallelism: 3
|
|
||||||
completions: 3
|
|
||||||
template:
|
|
||||||
metadata:
|
|
||||||
name: paddle-cluster-job
|
|
||||||
spec:
|
|
||||||
volumes:
|
|
||||||
- name: jobpath
|
|
||||||
hostPath:
|
|
||||||
path: /home/work/paddle_output
|
|
||||||
containers:
|
|
||||||
- name: trainer
|
|
||||||
image: registry.baidu.com/public/paddle:mypaddle
|
|
||||||
command: ["bin/bash", "-c", "/root/start.sh"]
|
|
||||||
env:
|
|
||||||
- name: JOB_NAME
|
|
||||||
value: paddle-cluster-job
|
|
||||||
- name: JOB_PATH
|
|
||||||
value: /home/jobpath
|
|
||||||
- name: JOB_NAMESPACE
|
|
||||||
value: default
|
|
||||||
- name: TRAIN_CONFIG_DIR
|
|
||||||
value: recommendation
|
|
||||||
- name: CONF_PADDLE_NIC
|
|
||||||
value: eth0
|
|
||||||
- name: CONF_PADDLE_PORT
|
|
||||||
value: "7164"
|
|
||||||
- name: CONF_PADDLE_PORTS_NUM
|
|
||||||
value: "2"
|
|
||||||
- name: CONF_PADDLE_PORTS_NUM_SPARSE
|
|
||||||
value: "2"
|
|
||||||
- name: CONF_PADDLE_GRADIENT_NUM
|
|
||||||
value: "3"
|
|
||||||
volumeMounts:
|
|
||||||
- name: jobpath
|
|
||||||
mountPath: /home/jobpath
|
|
||||||
restartPolicy: Never
|
|
||||||
|
|
@ -0,0 +1,7 @@
|
|||||||
|
FROM alpine
|
||||||
|
|
||||||
|
RUN apk update && apk upgrade && apk add coreutils
|
||||||
|
ADD quick_start /quick_start
|
||||||
|
ADD get_data.sh /bin/
|
||||||
|
RUN chmod +x /bin/get_data.sh
|
||||||
|
ENTRYPOINT ["/bin/get_data.sh"]
|
@ -0,0 +1,6 @@
|
|||||||
|
To build PaddlePaddle data preparation image in tutorial [Distributed PaddlePaddle Training on AWS with Kubernetes](../../k8s_aws_en.md), run following commands:
|
||||||
|
|
||||||
|
```
|
||||||
|
cp -r ../../../../../../demo/quick_start .
|
||||||
|
docker build . -t prepare-data-image-name
|
||||||
|
```
|
@ -0,0 +1,26 @@
|
|||||||
|
#!/bin/sh
|
||||||
|
|
||||||
|
out_dir=$OUT_DIR
|
||||||
|
split_count=$SPLIT_COUNT
|
||||||
|
|
||||||
|
set -e
|
||||||
|
|
||||||
|
mkdir -p $out_dir
|
||||||
|
cp -r /quick_start $out_dir/
|
||||||
|
|
||||||
|
mkdir -p $out_dir/0/data
|
||||||
|
cd $out_dir/0/data
|
||||||
|
wget http://paddlepaddle.bj.bcebos.com/demo/quick_start_preprocessed_data/preprocessed_data.tar.gz
|
||||||
|
tar zxvf preprocessed_data.tar.gz
|
||||||
|
rm preprocessed_data.tar.gz
|
||||||
|
|
||||||
|
split -d --number=l/$split_count -a 5 train.txt train.
|
||||||
|
mv train.00000 train.txt
|
||||||
|
|
||||||
|
cd $out_dir
|
||||||
|
end=$(expr $split_count - 1)
|
||||||
|
for i in $(seq 1 $end); do
|
||||||
|
mkdir -p $i/data
|
||||||
|
cp -r 0/data/* $i/data
|
||||||
|
mv $i/data/train.`printf %05d $i` $i/data/train.txt
|
||||||
|
done;
|
@ -0,0 +1,6 @@
|
|||||||
|
FROM paddledev/paddle:cpu-latest
|
||||||
|
|
||||||
|
COPY start.sh /root/
|
||||||
|
COPY start_paddle.py /root/
|
||||||
|
RUN chmod +x /root/start.sh
|
||||||
|
CMD ["bash"," -c","/root/start.sh"]
|
@ -0,0 +1,5 @@
|
|||||||
|
To build PaddlePaddle training image in tutorial [Distributed PaddlePaddle Training on AWS with Kubernetes](../../k8s_aws_en.md), run following command:
|
||||||
|
|
||||||
|
```
|
||||||
|
docker build . -t train-image-name
|
||||||
|
```
|
@ -1,19 +1,19 @@
|
|||||||
#!/bin/sh
|
#!/bin/sh
|
||||||
|
|
||||||
set -eu
|
set -eu
|
||||||
|
|
||||||
jobconfig=${JOB_PATH}"/"${JOB_NAME}"/"${TRAIN_CONFIG_DIR}
|
jobconfig=${JOB_PATH}"/"${JOB_NAME}"/"${TRAIN_CONFIG_DIR}
|
||||||
cd /root
|
cd /root
|
||||||
cp -rf $jobconfig .
|
cp -rf $jobconfig/* .
|
||||||
cd $TRAIN_CONFIG_DIR
|
|
||||||
|
|
||||||
|
|
||||||
python /root/start_paddle.py \
|
python /root/start_paddle.py \
|
||||||
--dot_period=10 \
|
--dot_period=10 \
|
||||||
--ports_num_for_sparse=$CONF_PADDLE_PORTS_NUM \
|
--ports_num=$CONF_PADDLE_PORTS_NUM \
|
||||||
|
--ports_num_for_sparse=$CONF_PADDLE_PORTS_NUM_SPARSE \
|
||||||
--log_period=50 \
|
--log_period=50 \
|
||||||
--num_passes=10 \
|
--num_passes=10 \
|
||||||
--trainer_count=4 \
|
--trainer_count=$TRAINER_COUNT \
|
||||||
--saving_period=1 \
|
--saving_period=1 \
|
||||||
--local=0 \
|
--local=0 \
|
||||||
--config=./trainer_config.py \
|
--config=trainer_config.lr.py \
|
||||||
--use_gpu=0
|
--use_gpu=0
|
After Width: | Height: | Size: 87 KiB |
File diff suppressed because it is too large
Load Diff
@ -0,0 +1,102 @@
|
|||||||
|
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
|
||||||
|
|
||||||
|
Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
you may not use this file except in compliance with the License.
|
||||||
|
You may obtain a copy of the License at
|
||||||
|
|
||||||
|
http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
|
||||||
|
Unless required by applicable law or agreed to in writing, software
|
||||||
|
distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
See the License for the specific language governing permissions and
|
||||||
|
limitations under the License. */
|
||||||
|
|
||||||
|
#pragma once
|
||||||
|
|
||||||
|
#include "Function.h"
|
||||||
|
#include "paddle/math/Matrix.h"
|
||||||
|
#include "paddle/math/SparseMatrix.h"
|
||||||
|
|
||||||
|
namespace paddle {
|
||||||
|
/// CPU, dense matrix (+)= dense matrix * dense matrix
|
||||||
|
template <DeviceType DType>
|
||||||
|
void MulOp(CpuMatrix& out,
|
||||||
|
const CpuMatrix& a,
|
||||||
|
const CpuMatrix& b,
|
||||||
|
real scaleAB,
|
||||||
|
real scaleT,
|
||||||
|
bool aTrans,
|
||||||
|
bool bTrans);
|
||||||
|
|
||||||
|
/// CPU, dense matrix (+)= sparse matrix * dense matrix
|
||||||
|
template <DeviceType DType>
|
||||||
|
void MulOp(CpuMatrix& out,
|
||||||
|
const CpuSparseMatrix& a,
|
||||||
|
const CpuMatrix& b,
|
||||||
|
real scaleAB,
|
||||||
|
real scaleT,
|
||||||
|
bool aTrans,
|
||||||
|
bool bTrans);
|
||||||
|
|
||||||
|
/// CPU, dense matrix (+)= dense matrix * sparse matrix
|
||||||
|
template <DeviceType DType>
|
||||||
|
void MulOp(CpuMatrix& out,
|
||||||
|
const CpuMatrix& a,
|
||||||
|
const CpuSparseMatrix& b,
|
||||||
|
real scaleAB,
|
||||||
|
real scaleT,
|
||||||
|
bool aTrans,
|
||||||
|
bool bTrans);
|
||||||
|
|
||||||
|
/// CPU, sparse matrix (+)= dense matrix * dense matrix
|
||||||
|
template <DeviceType DType>
|
||||||
|
void MulOp(CpuSparseMatrix& out,
|
||||||
|
const CpuMatrix& a,
|
||||||
|
const CpuMatrix& b,
|
||||||
|
real scaleAB,
|
||||||
|
real scaleT,
|
||||||
|
bool aTrans,
|
||||||
|
bool bTrans);
|
||||||
|
|
||||||
|
/// GPU, dense matrix (+)= dense matrix * dense matrix
|
||||||
|
template <DeviceType DType>
|
||||||
|
void MulOp(GpuMatrix& out,
|
||||||
|
const GpuMatrix& a,
|
||||||
|
const GpuMatrix& b,
|
||||||
|
real scaleAB,
|
||||||
|
real scaleT,
|
||||||
|
bool aTrans,
|
||||||
|
bool bTrans);
|
||||||
|
|
||||||
|
/// GPU, dense matrix (+)= sparse matrix * dense matrix
|
||||||
|
template <DeviceType DType>
|
||||||
|
void MulOp(GpuMatrix& out,
|
||||||
|
const GpuSparseMatrix& a,
|
||||||
|
const GpuMatrix& b,
|
||||||
|
real scaleAB,
|
||||||
|
real scaleT,
|
||||||
|
bool aTrans,
|
||||||
|
bool bTrans);
|
||||||
|
|
||||||
|
/// GPU, dense matrix (+)= dense matrix * sparse matrix
|
||||||
|
template <DeviceType DType>
|
||||||
|
void MulOp(GpuMatrix& out,
|
||||||
|
const GpuMatrix& a,
|
||||||
|
const GpuSparseMatrix& b,
|
||||||
|
real scaleAB,
|
||||||
|
real scaleT,
|
||||||
|
bool aTrans,
|
||||||
|
bool bTrans);
|
||||||
|
|
||||||
|
/// GPU, sparse matrix (+)= dense matrix * dense matrix
|
||||||
|
template <DeviceType DType>
|
||||||
|
void MulOp(GpuSparseMatrix& out,
|
||||||
|
const GpuMatrix& a,
|
||||||
|
const GpuMatrix& b,
|
||||||
|
real scaleAB,
|
||||||
|
real scaleT,
|
||||||
|
bool aTrans,
|
||||||
|
bool bTrans);
|
||||||
|
|
||||||
|
} // namespace paddle
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in new issue