@ -13,4 +13,3 @@ Check the logs for the distributed training progress and analyze the performance
## Enable verbos logs
Edit `pserver.yaml` and `trainer.yaml` and add an environment variable `GLOG_v=3` to see what happend in detail.
@ -11,7 +11,6 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""VGG16 benchmark in Fluid"""
from __future__ import print_function
@ -1,13 +1,13 @@
apiVersion: extensions/v1beta1
kind: ReplicaSet
metadata:
name: vgg16job-pserver
name: vgg16v2job-pserver
spec:
replicas: 10
template:
labels:
paddle-job-pserver: vgg16job
paddle-job-pserver: vgg16v2job
hostNetwork: true
imagePullSecrets:
@ -21,7 +21,7 @@ spec:
containerPort: 30236
env:
- name: PADDLE_JOB_NAME
value: vgg16job
value: vgg16v2job
- name: TRAINERS
value: "20"
- name: PSERVERS
@ -1,14 +1,14 @@
apiVersion: batch/v1
kind: Job
name: vgg16job-trainer
name: vgg16v2job-trainer
parallelism: 20
completions: 20
paddle-job: vgg16job
paddle-job: vgg16v2job
- name: job-registry-secret
@ -20,7 +20,7 @@ spec:
command: ["paddle_k8s", "start_trainer", "v2"]